{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "4e7f6cca-3e9a-4b24-b6f9-17795b8c10b8",
   "metadata": {},
   "source": [
    "## 数据合并\n",
    "### 1.轴向堆叠数据\n",
    "- 当使用concat()函数合并时，若是设置axis=1，且join=outer,\n",
    "- concat() 函数可以沿着一条轴将多个对象进行堆叠，其使用的方式类似数据库中的数据表合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1ff7dbe9-d714-42e8-9702-9a669f507503",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B   C   D\n",
       "0   A0   B0  C0  D0\n",
       "1   A0   B0  C0  D2\n",
       "2   A1   B1  C1  D2\n",
       "3  NaN  NaN  C3  D3"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df1 = pd.DataFrame({'A': ['A0', 'A0', 'A1'],\n",
    "                   'B': ['B0', 'B0', 'B1']})\n",
    "df2 = pd.DataFrame({'C': ['C0', 'C0', 'C1', 'C3'],\n",
    "                   'D': ['D0', 'D2', 'D2', 'D3']})\n",
    "#横向堆叠合并df1和df2,采用外连接的方式\n",
    "pd.concat([df1, df2], join='outer', axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d9a75dc1-c898-4d97-b726-2b0e3847f4e8",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "e186f33a-0f9f-49a4-9c25-8d0879a0b709",
   "metadata": {},
   "source": [
    "### 2.纵向堆叠与内连接\n",
    "- 当使用concat()函数合并时，如果设置axis=0, join='inner',则表示使用纵向堆叠与内连接的方式合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8cfac922-7eee-4fae-8a91-a45574409871",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n",
    "                   'B': ['B0', 'B1', 'B2'],\n",
    "                   'C': ['C0', 'C1', 'C2']})\n",
    "df2 = pd.DataFrame({'B': ['B3', 'B4', 'B5'],\n",
    "                   'C': ['C3', 'C4', 'C5'],\n",
    "                   'D': ['D3', 'D4', 'D5']})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a3f80aa8-3e21-4830-9cb0-b65cb98a111a",
   "metadata": {},
   "source": [
    "## 主键合并数据\n",
    "- 【重要】merge()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a9b1d359-68bb-48a8-80e5-34502c30ab29",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K1</td>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K2</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key   A   B   C   D\n",
       "0  K0  A0  B0  C0  D0\n",
       "1  K1  A1  B1  C1  D1\n",
       "2  K2  A2  B2  C2  D2"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#使用merge()函数，将left表与right表进行合并\n",
    "import pandas as pd\n",
    "left = pd.DataFrame({'key': ['K0', 'K1', 'K2'],\n",
    "                    'A': ['A0', 'A1', 'A2'],\n",
    "                    'B': ['B0', 'B1', 'B2']})\n",
    "right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],\n",
    "                    'C': ['C0', 'C1', 'C2', 'C3'],\n",
    "                    'D': ['D0', 'D1', 'D2', 'D3']})\n",
    "pd.merge(left, right, on='key')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ae3f00f5-db50-4ea5-a8a6-0bcb386f8226",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K2</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key   A   B   C   D\n",
       "0  K0  A0  B0  C0  D0\n",
       "1  K2  A2  B2  C2  D2"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "left = pd.DataFrame({'key': ['K0', 'K1', 'K2'],\n",
    "                    'A': ['A0', 'A1', 'A2'],\n",
    "                    'B': ['B0', 'B1', 'B2']})\n",
    "right = pd.DataFrame({'key': ['K0', 'K5', 'K2', 'K4'],\n",
    "                    'B': ['B0', 'B1', 'B2', 'B3'],\n",
    "                    'C': ['C0', 'C1', 'C2', 'C3'],\n",
    "                    'D': ['D0', 'D1', 'D2', 'D3']})\n",
    "pd.merge(left, right, on=['key', 'B'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "07333ffc-f1de-434c-86b7-4ecad8474c1f",
   "metadata": {},
   "source": [
    "外连接"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a976dae-696e-4b5b-ba70-5f8f409a73bc",
   "metadata": {},
   "source": [
    "左外连接，左连接：先满足左表，后满足右表"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d880f12b-213c-4cbf-81fb-c92623cb9c9f",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "baf9775c-f1da-45bf-8485-72e80beb941d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B    C    D\n",
       "0   A0   B0  NaN  NaN\n",
       "1   A1   B1  NaN  NaN\n",
       "2   A2   B2  NaN  NaN\n",
       "a  NaN  NaN   C0   D0\n",
       "b  NaN  NaN   C1   D1\n",
       "c  NaN  NaN   C2   D2"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n",
    "                    'B': ['B0', 'B1', 'B2']}, index=[0, 1, 2])\n",
    "right = pd.DataFrame({'C': ['C0', 'C1', 'C2'],\n",
    "                    'D': ['D0', 'D1', 'D2']}, index=['a', 'b', 'c'])\n",
    "#因为两张表的数据没有重叠部分，所以需要这样编写\n",
    "pd.merge(left, right, how='outer', left_index=True, right_index=True) "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b57366b9-a3b5-4d3a-bad6-ebb6a5d3533a",
   "metadata": {},
   "source": [
    "### 根据行索引合并数据\n",
    "- join()通过行索引或指定列来连接DataFrame, 语法格式：\n",
    "- join(other, on=None, how='left', lsuffix='', rsuffix='', sort=False)\n",
    "- on: 用于连接名\n",
    "- how: 可以从['left', 'right', 'outer', 'inner']中任意选一个，默认使用left的方式\n",
    "- Isuffix: 接收字符串，用于在左侧重叠的列名后添加后缀名\n",
    "- rsuffix: 接收字符串，用于在右侧重叠的列名后添加后缀名\n",
    "- sort: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "8625f276-73b1-4689-a95c-7aec60199e75",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B    C    D\n",
       "a   A0   B0  NaN  NaN\n",
       "b   A1   B1  NaN  NaN\n",
       "c  NaN  NaN   C0   D0\n",
       "d  NaN  NaN   C1   D1"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "left = pd.DataFrame({'A': ['A0', 'A1'],\n",
    "                    'B': ['B0', 'B1']}, index=['a', 'b'])\n",
    "right = pd.DataFrame({'C': ['C0', 'C1'],\n",
    "                    'D': ['D0', 'D1']}, index=['c', 'd'])\n",
    "\n",
    "left.join(right, how='outer')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f8bddda0-1fe2-4f09-97e0-04f27a394229",
   "metadata": {},
   "source": [
    "- 上述代码中，创建了两个DataFrame对象left与right, 然后使用join()将这两个对象合并，然后再使用how参数指定连接的方式，合并后缺失的数据使用NaN填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "13eccad6-ffc1-4eaf-83e1-2b5e60c7f4bf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B key   C   D\n",
       "0  A0  B0  K0  C0  D0\n",
       "1  A1  B1  K1  C1  D1\n",
       "2  A2  B2  K2  C2  D2"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n",
    "                    'B': ['B0', 'B1', 'B2'], \n",
    "                    'key': ['K0', 'K1', 'K2']}, index=[0, 1, 2])\n",
    "right = pd.DataFrame({'C': ['C0', 'C1', 'C2'],\n",
    "                    'D': ['D0', 'D1', 'D2']}, index=['K0', 'K1', 'K2'])\n",
    "\n",
    "# on 参数指定连接的列名\n",
    "left.join(right, how='left', on='key')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1bde5cb5-90d7-4f99-9156-a15989f979fd",
   "metadata": {},
   "source": [
    "- 上述代码中，创建了两个DataFrame对象left与right, 然后在join()中设置了连接方式和连接列名"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "634cd0bf-ba90-469d-96d8-52868482eaef",
   "metadata": {},
   "source": [
    "## 合并重叠数据\n",
    "- 在处理数据的过程中，当一个DataFrame对象中出现了缺失数据，而对于这些缺失数据，我们希望可以使用其他DataFrame对象中的数据进行填充，这时可以使用combine_first()\n",
    "- combine_first(other)\n",
    "- other: 用于接收填充缺失值的DataFrame对象\n",
    "- 尽管left表与right表中的行索引顺序不同，当用right表中的数据替换left表中的NaN值时，替换数据与缺失数据的索引位置仍然是相同的。例如，left表中位于第0行第A列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "154e0767-3702-45d4-99d2-9595d2c7ef3e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "      <td>K0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A0</td>\n",
       "      <td>B1</td>\n",
       "      <td>K1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A1</td>\n",
       "      <td>D2</td>\n",
       "      <td>K2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A2</td>\n",
       "      <td>B3</td>\n",
       "      <td>K3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B key\n",
       "0  C1  D1  K0\n",
       "1  A0  B1  K1\n",
       "2  A1  D2  K2\n",
       "3  A2  B3  K3"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from numpy import NAN\n",
    "left = pd.DataFrame({'A': [np.nan, 'A0', 'A1', 'A2'],\n",
    "                     'B': [np.nan, 'B1',np.nan, 'B3'],\n",
    "                    'key': ['K0', 'K1', 'K2', 'K3']})\n",
    "right = pd.DataFrame({'A': ['C0', 'C1', 'C2'],\n",
    "                    'B': ['D0', 'D1', 'D2']},index=[1, 0, 2])\n",
    "#用combine_frist()将right表中的数据填充到left表缺失的部分\n",
    "left.combine_first(right)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2b5eb0ab-9ae0-4156-ad14-c07e977a7edc",
   "metadata": {},
   "source": [
    "注意，"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d7c1d41e-62d7-4c01-a824-977d0b35b3c2",
   "metadata": {},
   "source": [
    "## 数据重塑\n",
    "- 在Pandas中，大多数数据是以便于操作的DataFrame的形式展现的，这样很容易地获取每行或每列的数据，不过有时候，需要将DataFrame对象转换为Series对象，为此，Pandas提供了数据重塑的一些功能，包括重塑层次化索引和轴向转换，用于转换一个表格或向量结构\n",
    "- ### 重塑层次化索引\n",
    "- Pandas中重塑层次化索引的操作主要是\n",
    "- stack() 将数据的列索引转换为行索引\n",
    "- DataFrame.stack(level=-1, dropna=True)\n",
    "- level: 操作的内层索引，若设为0，表示操作外层索引，默认-1\n",
    "- dropa: 表示是否将旋转后的缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "f4ff50d5-60f0-460c-bf56-81bb4357635d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0  A    A0\n",
      "   B    B0\n",
      "1  A    A1\n",
      "   B    B1\n",
      "2  A    A2\n",
      "   B    B2\n",
      "dtype: object\n",
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n",
    "                    'B': ['B0', 'B1', 'B2']})\n",
    "#将df 进行重塑\n",
    "result = df.stack()\n",
    "print(result)\n",
    "\n",
    "print(type(result))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "71dbc327-54cd-42e8-a486-5bc238a659a8",
   "metadata": {},
   "source": [
    "- 上述代码中，首先创建了一个DataFRAME类的对象df,然后让df对象调用stack()进行重塑，表明df对象的列索引会转换成行索引，从输出的结果来看，result对象具有两层行索引\n",
    "- DataFrame对象通过stack()已经转换成一个Series对象"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c2d20e5-b768-4dc3-869b-8d59d7c323ba",
   "metadata": {},
   "source": [
    "unstack(): 将数据的行索引转换为列索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "0dbb9a16-d891-44a1-bed6-c59d4c7bce8f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0  A    A0\n",
      "   B    B0\n",
      "1  A    A1\n",
      "   B    B1\n",
      "2  A    A2\n",
      "   B    B2\n",
      "dtype: object\n",
      "<class 'pandas.core.series.Series'>\n",
      "    A   B\n",
      "0  A0  B0\n",
      "1  A1  B1\n",
      "2  A2  B2\n",
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "source": [
    "# 将上一个示例中的Series对象“恢复原样”，转变为DataFrame对象\n",
    "import pandas as pd\n",
    "df = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n",
    "                    'B': ['B0', 'B1', 'B2']})\n",
    "#将df 进行重塑\n",
    "result = df.stack()\n",
    "print(result)\n",
    "\n",
    "print(type(result))\n",
    "\n",
    "result_new = result.unstack()\n",
    "print(result_new)\n",
    "print(type(result_new))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8e83122d-79c5-4342-a9ff-5c702d702b8f",
   "metadata": {},
   "source": [
    "除此之外，stack()和unstack()还可以在 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "0833747a-2d42-4725-b1c6-5384320310ee",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      一楼      二楼    \n",
      "     A教室 B教室 A教室 B教室\n",
      "男生人数  26  20  22  26\n",
      "女生人数  30  25  24  20\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Windows\\Temp\\ipykernel_6624\\4205994747.py:8: FutureWarning: The previous implementation of stack is deprecated and will be removed in a future version of pandas. See the What's New notes for pandas 2.1.0 for details. Specify future_stack=True to adopt the new implementation and silence this warning.\n",
      "  df.stack()\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>一楼</th>\n",
       "      <th>二楼</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">男生人数</th>\n",
       "      <th>A教室</th>\n",
       "      <td>26</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B教室</th>\n",
       "      <td>20</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">女生人数</th>\n",
       "      <th>A教室</th>\n",
       "      <td>30</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B教室</th>\n",
       "      <td>25</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          一楼  二楼\n",
       "男生人数 A教室  26  22\n",
       "     B教室  20  26\n",
       "女生人数 A教室  30  24\n",
       "     B教室  25  20"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.DataFrame(np.array([[26,20,22,26],[30,25,24,20]]),\n",
    "                  index=['男生人数', '女生人数'],\n",
    "                 columns=[['一楼', '一楼', '二楼', '二楼'],\n",
    "                         ['A教室', 'B教室','A教室','B教室']])\n",
    "print(df)\n",
    "df.stack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "a667ee71-4746-40da-940f-d54cead2c0f3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      一楼      二楼    \n",
      "     A教室 B教室 A教室 B教室\n",
      "男生人数  26  20  22  26\n",
      "女生人数  30  25  24  20\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Windows\\Temp\\ipykernel_6624\\923779331.py:8: FutureWarning: The previous implementation of stack is deprecated and will be removed in a future version of pandas. See the What's New notes for pandas 2.1.0 for details. Specify future_stack=True to adopt the new implementation and silence this warning.\n",
      "  df.stack(level=0)    #旋转外层索引\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>A教室</th>\n",
       "      <th>B教室</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">男生人数</th>\n",
       "      <th>一楼</th>\n",
       "      <td>26</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>二楼</th>\n",
       "      <td>22</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">女生人数</th>\n",
       "      <th>一楼</th>\n",
       "      <td>30</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>二楼</th>\n",
       "      <td>24</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         A教室  B教室\n",
       "男生人数 一楼   26   20\n",
       "     二楼   22   26\n",
       "女生人数 一楼   30   25\n",
       "     二楼   24   20"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.DataFrame(np.array([[26,20,22,26],[30,25,24,20]]),\n",
    "                  index=['男生人数', '女生人数'],\n",
    "                 columns=[['一楼', '一楼', '二楼', '二楼'],\n",
    "                         ['A教室', 'B教室','A教室','B教室']])\n",
    "print(df)\n",
    "df.stack(level=0)    #旋转外层索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "2e40dc2b-a767-48cd-b981-e5f2ef95e1c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "楼层      一楼          二楼      \n",
      "教室     A教室   B教室   A教室   B教室\n",
      "性别                          \n",
      "女生人数  26.0  20.0  22.0  26.0\n",
      "男生人数  30.0  25.0  24.0  20.0\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "#需要转换为长格式\n",
    "data = {\n",
    "    '楼层': ['一楼','一楼','二楼','二楼','一楼','一楼','二楼','二楼'],\n",
    "    '教室': ['A教室','B教室','A教室','B教室','A教室','B教室','A教室','B教室'],\n",
    "    '性别': ['女生人数','女生人数','女生人数','女生人数','男生人数','男生人数','男生人数','男生人数'],\n",
    "    '人数': [26, 20, 22, 26, 30, 25, 24, 20]\n",
    "}\n",
    "df_long = pd.DataFrame(data)\n",
    "\n",
    "#使用pivot_table()函数重塑数据\n",
    "df_pivot = df_long.pivot_table(index='性别', columns=['楼层', '教室'], values='人数')\n",
    "print(df_pivot)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5cc69f4f-19ec-4528-99a6-0ac7b2ba3bef",
   "metadata": {},
   "source": [
    "### 轴向旋转\n",
    "- 在Pandas中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "c99a2c7d-bd42-48d6-a7df-83aa63d78fc2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>商品名称</th>\n",
       "      <th>OPPO A1</th>\n",
       "      <th>小米6x</th>\n",
       "      <th>荣耀9青春版</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>出售日期</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2017年5月25日</th>\n",
       "      <td>1399元</td>\n",
       "      <td>1399元</td>\n",
       "      <td>999元</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017年6月18日</th>\n",
       "      <td>1250元</td>\n",
       "      <td>1200元</td>\n",
       "      <td>800元</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "商品名称       OPPO A1   小米6x 荣耀9青春版\n",
       "出售日期                            \n",
       "2017年5月25日   1399元  1399元   999元\n",
       "2017年6月18日   1250元  1200元   800元"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame({'商品名称': ['荣耀9青春版', '小米6x', 'OPPO A1', '荣耀9青春版', '小米6x', 'OPPO A1'],\n",
    "                  '出售日期': ['2017年5月25日', '2017年5月25日', '2017年5月25日', '2017年6月18日', '2017年6月18日', '2017年6月18日'],\n",
    "                  '价格': ['999元', '1399元', '1399元', '800元', '1200元', '1250元']})\n",
    "df.pivot(index='出售日期', columns='商品名称', values='价格')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2443aafe-8afe-41d9-af3f-af3d1967ef39",
   "metadata": {},
   "source": [
    "在数据重塑中，三个函数需要记住：【unstack()、povit_table()、pivot()】"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c8becbf7-18f6-4ea8-a970-7e6594df64a8",
   "metadata": {},
   "source": [
    "## 数据转换\n",
    "- 打那个数据经过清洗之后，这些数据并不能直接拿来进行分析建模，所以为了进一步对数据进行分析，需要对数据进行一些合理的\n",
    "- pandas中提供了一个rename()方法来重命名个别列索引或行索引的名称\n",
    "- rename(mapper=None, index=None, columns=None, axis=None, copy=True, inplace=False, level=None)\n",
    "- index, columns: 表示转换的行索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "6f1b8463-434a-4f2a-8311-a2749f09bac6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    A   B   C\n",
      "0  A0  B0  C0\n",
      "1  A1  B1  C1\n",
      "2  A2  B2  C2\n",
      "3  A3  B3  C3\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>壹</th>\n",
       "      <th>贰</th>\n",
       "      <th>叁</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>C1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    壹   贰   叁\n",
       "0  A0  B0  C0\n",
       "1  A1  B1  C1\n",
       "2  A2  B2  C2\n",
       "3  A3  B3  C3"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                   'B': ['B0', 'B1', 'B2', 'B3'],\n",
    "                   'C': ['C0', 'C1', 'C2', 'C3']})\n",
    "print(df)\n",
    "#重命名列索引的名称，并且在原有的数据上进行修改\n",
    "df.rename(columns={'A':'壹', 'B':'贰', 'C':'叁'}, inplace=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6c20e88f-5cba-4927-9580-dd56bc17ada2",
   "metadata": {},
   "source": [
    "上述案例中，创建了一个4行3列的DataFrame对象df,其列索引名称为A、B、C，然后调用rename()直接将df对象的每个列索引名称重命名"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ec7f096c-2fe9-40ef-8fd1-2be89cbcfc64",
   "metadata": {},
   "source": [
    "还可以使用str"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "3d6b22ad-ebae-4c90-b182-89c153e166f1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    A   B   C\n",
      "0  A0  B0  C0\n",
      "1  A1  B1  C1\n",
      "2  A2  B2  C2\n",
      "3  A3  B3  C3\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>C1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    a   b   c\n",
       "0  A0  B0  C0\n",
       "1  A1  B1  C1\n",
       "2  A2  B2  C2\n",
       "3  A3  B3  C3"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                   'B': ['B0', 'B1', 'B2', 'B3'],\n",
    "                   'C': ['C0', 'C1', 'C2', 'C3']})\n",
    "print(df)\n",
    "df.rename(str.lower, axis='columns')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3f91e241-2ebb-441b-884a-ec847981af6d",
   "metadata": {},
   "source": [
    "还可以通过rename()对行索引进行重命名"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "77b7fca3-a5b0-4e70-ad49-3b69e967309e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    A   B   C\n",
      "0  A0  B0  C0\n",
      "1  A1  B1  C1\n",
      "2  A2  B2  C2\n",
      "3  A3  B3  C3\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>第二行</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>C1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>第三行</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A   B   C\n",
       "0    A0  B0  C0\n",
       "第二行  A1  B1  C1\n",
       "第三行  A2  B2  C2\n",
       "3    A3  B3  C3"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                   'B': ['B0', 'B1', 'B2', 'B3'],\n",
    "                   'C': ['C0', 'C1', 'C2', 'C3']})\n",
    "print(df)\n",
    "df.rename(index={1:'第二行', 2:'第三行'}, inplace=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "950319f7-0f8a-4fa3-800f-e01170a2f46c",
   "metadata": {},
   "source": [
    "### 离散化操作\n",
    "- pandas中的cut()\n",
    "- pandas.cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False, duplicates='raise')\n",
    "- x: 表示要分箱的数组，必须是一维的\n",
    "- bins: 接收int和序列类型的数据，如果传入的是int类型的值，则表示在x范围内的等宽单元的数量（划分多少个等间距的区间）；如果传入的是一个序列，则表示将x划分在指定的序列中，若x不在此序列，则为NaN\n",
    "- right: 是否包含右端点，决定区间的开闭，默认为True\n",
    "- labels: 用于生成区间的标签\n",
    "- retbins: 是否返回bin\n",
    "- precision: 精度，默认保留3位小数\n",
    "- include_lowest: 默认包含左端点\n",
    "- duplicates:\n",
    "- cut()函数会返回一个Cateorical对象，我们可以将其看作一组表示面元名称的字符串，它包含了分组的数量以及不同分类的名称"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "49dffafa-3da6-4752-aca8-2756a06bd348",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(18, 25], (18, 25], (18, 25], (25, 35], (18, 25], ..., (35, 60], (25, 35], (60, 100], (35, 60], (25, 35]]\n",
       "Length: 11\n",
       "Categories (5, interval[int64, right]): [(0, 18] < (18, 25] < (25, 35] < (35, 60] < (60, 100]]"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "#使用pandas的cut()划分年龄组\n",
    "ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 32]\n",
    "bins = [0, 18, 25, 35, 60, 100]\n",
    "cuts = pd.cut(ages, bins)\n",
    "cuts"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "10041145-a243-4cc6-8945-c24c8eba3589",
   "metadata": {},
   "source": [
    "上述代码中，定义了表示"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "f395494a-4524-44ef-b0e1-5a2d19a941a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[18, 25), [18, 25), [25, 35), [25, 35), [18, 25), ..., [35, 60), [25, 35), [60, 100), [35, 60), [25, 35)]\n",
       "Length: 11\n",
       "Categories (5, interval[int64, left]): [[0, 18) < [18, 25) < [25, 35) < [35, 60) < [60, 100)]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.cut(ages, bins=bins, right=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "16b838d5-1943-463b-8f4c-93f02629a82f",
   "metadata": {},
   "source": [
    "### 哑变量处理类型数据\n",
    "- "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "5ec4df14-4efa-465d-b0e8-b2d32dc7808c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col__司机</th>\n",
       "      <th>col__学生</th>\n",
       "      <th>col__导游</th>\n",
       "      <th>col__工人</th>\n",
       "      <th>col__教师</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col__司机  col__学生  col__导游  col__工人  col__教师\n",
       "0    False    False    False     True    False\n",
       "1    False     True    False    False    False\n",
       "2     True    False    False    False    False\n",
       "3    False    False    False    False     True\n",
       "4    False    False     True    False    False"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df1 = pd.DataFrame({'职业': ['工人', '学生', '司机', '教师', '导游']})\n",
    "pd.get_dummies(df1, prefix=['col_'])\n",
    "#哑变量处理，将数据变成哑变量矩阵，每个特征数据（如学生）为单独一列，\n",
    "#通过prefix参数给每个列名前缀添加col_,并用 _ 进行链接，使其变为col_教师...\n",
    "#通过结果可以看出 True=1, False=0"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4d7aa430-b0f4-48c6-9d55-0889496b04f3",
   "metadata": {},
   "source": [
    "#### 案例--预处理地区信息数据\n",
    "- 1.检查重复数据\n",
    "- 2."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "7c220c2b-f061-43f0-8a84-f983f649875a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>省级单位</th>\n",
       "      <th>地级单位</th>\n",
       "      <th>县级单位</th>\n",
       "      <th>区划类型</th>\n",
       "      <th>行政面积（K㎡）</th>\n",
       "      <th>户籍人口（万人）</th>\n",
       "      <th>男性</th>\n",
       "      <th>女性</th>\n",
       "      <th>GDP（亿元）</th>\n",
       "      <th>常住人口（万人）</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>西城区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>51</td>\n",
       "      <td>146.47</td>\n",
       "      <td>72.88</td>\n",
       "      <td>73.59</td>\n",
       "      <td>3602.36</td>\n",
       "      <td>125.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>东城区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>42</td>\n",
       "      <td>97.41</td>\n",
       "      <td>47.91</td>\n",
       "      <td>49.50</td>\n",
       "      <td>2061.80</td>\n",
       "      <td>87.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>丰台区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>306</td>\n",
       "      <td>115.33</td>\n",
       "      <td>58.39</td>\n",
       "      <td>56.95</td>\n",
       "      <td>1297.03</td>\n",
       "      <td>225.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>西城区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>51</td>\n",
       "      <td>146.47</td>\n",
       "      <td>72.88</td>\n",
       "      <td>73.59</td>\n",
       "      <td>3602.36</td>\n",
       "      <td>125.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>朝阳区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>455</td>\n",
       "      <td>210.91</td>\n",
       "      <td>105.43</td>\n",
       "      <td>105.48</td>\n",
       "      <td>5171.03</td>\n",
       "      <td>385.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>房山区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1990</td>\n",
       "      <td>81.28</td>\n",
       "      <td>40.76</td>\n",
       "      <td>40.52</td>\n",
       "      <td>606.61</td>\n",
       "      <td>109.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>丰台区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>306</td>\n",
       "      <td>115.33</td>\n",
       "      <td>58.39</td>\n",
       "      <td>56.95</td>\n",
       "      <td>1297.03</td>\n",
       "      <td>225.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>石景山区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>84</td>\n",
       "      <td>38.69</td>\n",
       "      <td>19.87</td>\n",
       "      <td>18.82</td>\n",
       "      <td>482.14</td>\n",
       "      <td>63.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>海淀区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>431</td>\n",
       "      <td>240.20</td>\n",
       "      <td>120.08</td>\n",
       "      <td>120.12</td>\n",
       "      <td>5395.16</td>\n",
       "      <td>359.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>房山区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1990</td>\n",
       "      <td>81.28</td>\n",
       "      <td>40.76</td>\n",
       "      <td>40.52</td>\n",
       "      <td>606.61</td>\n",
       "      <td>109.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>通州区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>906</td>\n",
       "      <td>74.68</td>\n",
       "      <td>37.08</td>\n",
       "      <td>37.60</td>\n",
       "      <td>674.81</td>\n",
       "      <td>142.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>顺义区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1020</td>\n",
       "      <td>62.74</td>\n",
       "      <td>31.12</td>\n",
       "      <td>31.61</td>\n",
       "      <td>1591.60</td>\n",
       "      <td>107.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>昌平区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1344</td>\n",
       "      <td>61.14</td>\n",
       "      <td>30.72</td>\n",
       "      <td>30.41</td>\n",
       "      <td>753.39</td>\n",
       "      <td>201.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>大兴区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1036</td>\n",
       "      <td>68.38</td>\n",
       "      <td>34.02</td>\n",
       "      <td>34.36</td>\n",
       "      <td>1796.95</td>\n",
       "      <td>169.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>门头沟区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1451</td>\n",
       "      <td>25.12</td>\n",
       "      <td>12.80</td>\n",
       "      <td>12.32</td>\n",
       "      <td>157.86</td>\n",
       "      <td>31.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>怀柔区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>2123</td>\n",
       "      <td>28.29</td>\n",
       "      <td>14.13</td>\n",
       "      <td>14.16</td>\n",
       "      <td>259.41</td>\n",
       "      <td>39.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>平谷区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>950</td>\n",
       "      <td>40.20</td>\n",
       "      <td>20.22</td>\n",
       "      <td>19.98</td>\n",
       "      <td>218.31</td>\n",
       "      <td>43.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>密云区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>2229</td>\n",
       "      <td>43.59</td>\n",
       "      <td>21.77</td>\n",
       "      <td>21.82</td>\n",
       "      <td>251.13</td>\n",
       "      <td>48.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>延庆区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1994</td>\n",
       "      <td>28.42</td>\n",
       "      <td>14.32</td>\n",
       "      <td>14.11</td>\n",
       "      <td>122.66</td>\n",
       "      <td>32.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   省级单位 地级单位  县级单位 区划类型  行政面积（K㎡）  户籍人口（万人）      男性      女性  GDP（亿元）  常住人口（万人）\n",
       "0    北京   北京   西城区  市辖区        51    146.47   72.88   73.59  3602.36     125.9\n",
       "1    北京   北京   东城区  市辖区        42     97.41   47.91   49.50  2061.80      87.8\n",
       "2    北京   北京   丰台区  市辖区       306    115.33   58.39   56.95  1297.03     225.5\n",
       "3    北京   北京   西城区  市辖区        51    146.47   72.88   73.59  3602.36     125.9\n",
       "4    北京   北京   朝阳区  市辖区       455    210.91  105.43  105.48  5171.03     385.6\n",
       "5    北京   北京   房山区  市辖区      1990     81.28   40.76   40.52   606.61     109.6\n",
       "6    北京   北京   丰台区  市辖区       306    115.33   58.39   56.95  1297.03     225.5\n",
       "7    北京   北京  石景山区  市辖区        84     38.69   19.87   18.82   482.14      63.4\n",
       "8    北京   北京   海淀区  市辖区       431    240.20  120.08  120.12  5395.16     359.3\n",
       "9    北京   北京   房山区  市辖区      1990     81.28   40.76   40.52   606.61     109.6\n",
       "10   北京   北京   通州区  市辖区       906     74.68   37.08   37.60   674.81     142.8\n",
       "11   北京   北京   顺义区  市辖区      1020     62.74   31.12   31.61  1591.60     107.5\n",
       "12   北京   北京   昌平区  市辖区      1344     61.14   30.72   30.41   753.39     201.0\n",
       "13   北京   北京   大兴区  市辖区      1036     68.38   34.02   34.36  1796.95     169.4\n",
       "14   北京   北京  门头沟区  市辖区      1451     25.12   12.80   12.32   157.86      31.1\n",
       "15   北京   北京   怀柔区  市辖区      2123     28.29   14.13   14.16   259.41      39.3\n",
       "16   北京   北京   平谷区  市辖区       950     40.20   20.22   19.98   218.31      43.7\n",
       "17   北京   北京   密云区  市辖区      2229     43.59   21.77   21.82   251.13      48.3\n",
       "18   北京   北京   延庆区  市辖区      1994     28.42   14.32   14.11   122.66      32.7"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取北京市地区信息\n",
    "file_path_bj = open('./data/北京地区信息.csv')\n",
    "file_data_bjinfo = pd.read_csv(file_path_bj)\n",
    "file_data_bjinfo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "3f7eadd4-4c43-415e-bccb-30a1e1ff3a27",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>省级单位</th>\n",
       "      <th>地级单位</th>\n",
       "      <th>县级单位</th>\n",
       "      <th>区划类型</th>\n",
       "      <th>行政面积（K㎡）</th>\n",
       "      <th>户籍人口（万人）</th>\n",
       "      <th>男性</th>\n",
       "      <th>女性</th>\n",
       "      <th>GDP（亿元）</th>\n",
       "      <th>常住人口（万人）</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>和平区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>10</td>\n",
       "      <td>42.32</td>\n",
       "      <td>20.37</td>\n",
       "      <td>21.95</td>\n",
       "      <td>802.62</td>\n",
       "      <td>35.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>河东区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>39</td>\n",
       "      <td>75.79</td>\n",
       "      <td>38.06</td>\n",
       "      <td>37.73</td>\n",
       "      <td>290.98</td>\n",
       "      <td>97.61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>河西区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>37</td>\n",
       "      <td>83.20</td>\n",
       "      <td>40.83</td>\n",
       "      <td>42.37</td>\n",
       "      <td>819.85</td>\n",
       "      <td>99.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>南开区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>39</td>\n",
       "      <td>87.28</td>\n",
       "      <td>43.30</td>\n",
       "      <td>43.98</td>\n",
       "      <td>652.09</td>\n",
       "      <td>114.55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>河北区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>27</td>\n",
       "      <td>63.42</td>\n",
       "      <td>31.86</td>\n",
       "      <td>31.56</td>\n",
       "      <td>415.67</td>\n",
       "      <td>89.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>红桥区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>21</td>\n",
       "      <td>51.66</td>\n",
       "      <td>25.93</td>\n",
       "      <td>25.73</td>\n",
       "      <td>208.16</td>\n",
       "      <td>56.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>东丽区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>460</td>\n",
       "      <td>37.70</td>\n",
       "      <td>18.83</td>\n",
       "      <td>18.87</td>\n",
       "      <td>927.08</td>\n",
       "      <td>76.04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>西青区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>545</td>\n",
       "      <td>14.85</td>\n",
       "      <td>19.85</td>\n",
       "      <td>20.38</td>\n",
       "      <td>1040.27</td>\n",
       "      <td>85.37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>津南区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>401</td>\n",
       "      <td>44.83</td>\n",
       "      <td>22.35</td>\n",
       "      <td>22.48</td>\n",
       "      <td>810.16</td>\n",
       "      <td>89.41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>北辰区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>478</td>\n",
       "      <td>40.39</td>\n",
       "      <td>20.09</td>\n",
       "      <td>20.30</td>\n",
       "      <td>1058.14</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>武清区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1570</td>\n",
       "      <td>92.27</td>\n",
       "      <td>45.86</td>\n",
       "      <td>46.41</td>\n",
       "      <td>1151.65</td>\n",
       "      <td>119.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>宝坻区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1523</td>\n",
       "      <td>71.10</td>\n",
       "      <td>35.72</td>\n",
       "      <td>35.39</td>\n",
       "      <td>684.07</td>\n",
       "      <td>92.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>滨海新区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>2270</td>\n",
       "      <td>128.18</td>\n",
       "      <td>66.04</td>\n",
       "      <td>62.14</td>\n",
       "      <td>6654.00</td>\n",
       "      <td>299.42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>宁河区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1414</td>\n",
       "      <td>40.00</td>\n",
       "      <td>20.21</td>\n",
       "      <td>19.79</td>\n",
       "      <td>525.37</td>\n",
       "      <td>49.57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>静海区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1476</td>\n",
       "      <td>59.79</td>\n",
       "      <td>30.35</td>\n",
       "      <td>29.44</td>\n",
       "      <td>667.83</td>\n",
       "      <td>79.29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>蓟州区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1593</td>\n",
       "      <td>86.24</td>\n",
       "      <td>43.86</td>\n",
       "      <td>42.38</td>\n",
       "      <td>392.55</td>\n",
       "      <td>91.15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   省级单位 地级单位  县级单位 区划类型  行政面积（K㎡）  户籍人口（万人）     男性     女性  GDP（亿元）  常住人口（万人）\n",
       "0    天津   天津   和平区  市辖区        10     42.32  20.37  21.95   802.62     35.19\n",
       "1    天津   天津   河东区  市辖区        39     75.79  38.06  37.73   290.98     97.61\n",
       "2    天津   天津   河西区  市辖区        37     83.20  40.83  42.37   819.85     99.25\n",
       "3    天津   天津   南开区  市辖区        39     87.28  43.30  43.98   652.09    114.55\n",
       "4    天津   天津   河北区  市辖区        27     63.42  31.86  31.56   415.67     89.24\n",
       "5    天津   天津   红桥区  市辖区        21     51.66  25.93  25.73   208.16     56.69\n",
       "6    天津   天津   东丽区  市辖区       460     37.70  18.83  18.87   927.08     76.04\n",
       "7    天津   天津   西青区  市辖区       545     14.85  19.85  20.38  1040.27     85.37\n",
       "8    天津   天津   津南区  市辖区       401     44.83  22.35  22.48   810.16     89.41\n",
       "9    天津   天津   北辰区  市辖区       478     40.39  20.09  20.30  1058.14       NaN\n",
       "10   天津   天津   武清区  市辖区      1570     92.27  45.86  46.41  1151.65    119.96\n",
       "11   天津   天津   宝坻区  市辖区      1523     71.10  35.72  35.39   684.07     92.98\n",
       "12   天津   天津  滨海新区  市辖区      2270    128.18  66.04  62.14  6654.00    299.42\n",
       "13   天津   天津   宁河区  市辖区      1414     40.00  20.21  19.79   525.37     49.57\n",
       "14   天津   天津   静海区  市辖区      1476     59.79  30.35  29.44   667.83     79.29\n",
       "15   天津   天津   蓟州区  市辖区      1593     86.24  43.86  42.38   392.55     91.15"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取天津市地区信息\n",
    "file_path_tj = open('./data/天津地区信息.csv')\n",
    "file_data_tjinfo = pd.read_csv(file_path_tj)\n",
    "file_data_tjinfo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "b4dcc0cd-008e-4a25-aacc-7e6e988fc1ea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     False\n",
       "1     False\n",
       "2     False\n",
       "3      True\n",
       "4     False\n",
       "5     False\n",
       "6      True\n",
       "7     False\n",
       "8     False\n",
       "9      True\n",
       "10    False\n",
       "11    False\n",
       "12    False\n",
       "13    False\n",
       "14    False\n",
       "15    False\n",
       "16    False\n",
       "17    False\n",
       "18    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 检查file_data_bjinfo中的数据，返回True表示的是重复数据\n",
    "file_data_bjinfo.duplicated()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "3d7434df-e12b-4145-bccf-6a70f6d078d6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     False\n",
       "1     False\n",
       "2     False\n",
       "3     False\n",
       "4     False\n",
       "5     False\n",
       "6     False\n",
       "7     False\n",
       "8     False\n",
       "9     False\n",
       "10    False\n",
       "11    False\n",
       "12    False\n",
       "13    False\n",
       "14    False\n",
       "15    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 检查file_data_tjinfo中的数据，返回True表示的是重复数据\n",
    "file_data_tjinfo.duplicated()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6fa0179a-197b-4cc7-90e9-194d8cea886c",
   "metadata": {},
   "source": [
    "通过两次的输出结果可以看出，file_data_bjinfo中，索引3、6、9对应的值为True, 表明这几行数据都是重复的。而file_data_tjinfo中没有重复的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "795ff79e-1dc0-4a42-a89e-cf3f8872f96b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>省级单位</th>\n",
       "      <th>地级单位</th>\n",
       "      <th>县级单位</th>\n",
       "      <th>区划类型</th>\n",
       "      <th>行政面积（K㎡）</th>\n",
       "      <th>户籍人口（万人）</th>\n",
       "      <th>男性</th>\n",
       "      <th>女性</th>\n",
       "      <th>GDP（亿元）</th>\n",
       "      <th>常住人口（万人）</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>西城区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>51</td>\n",
       "      <td>146.47</td>\n",
       "      <td>72.88</td>\n",
       "      <td>73.59</td>\n",
       "      <td>3602.36</td>\n",
       "      <td>125.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>东城区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>42</td>\n",
       "      <td>97.41</td>\n",
       "      <td>47.91</td>\n",
       "      <td>49.50</td>\n",
       "      <td>2061.80</td>\n",
       "      <td>87.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>丰台区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>306</td>\n",
       "      <td>115.33</td>\n",
       "      <td>58.39</td>\n",
       "      <td>56.95</td>\n",
       "      <td>1297.03</td>\n",
       "      <td>225.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>朝阳区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>455</td>\n",
       "      <td>210.91</td>\n",
       "      <td>105.43</td>\n",
       "      <td>105.48</td>\n",
       "      <td>5171.03</td>\n",
       "      <td>385.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>房山区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1990</td>\n",
       "      <td>81.28</td>\n",
       "      <td>40.76</td>\n",
       "      <td>40.52</td>\n",
       "      <td>606.61</td>\n",
       "      <td>109.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>石景山区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>84</td>\n",
       "      <td>38.69</td>\n",
       "      <td>19.87</td>\n",
       "      <td>18.82</td>\n",
       "      <td>482.14</td>\n",
       "      <td>63.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>海淀区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>431</td>\n",
       "      <td>240.20</td>\n",
       "      <td>120.08</td>\n",
       "      <td>120.12</td>\n",
       "      <td>5395.16</td>\n",
       "      <td>359.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>通州区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>906</td>\n",
       "      <td>74.68</td>\n",
       "      <td>37.08</td>\n",
       "      <td>37.60</td>\n",
       "      <td>674.81</td>\n",
       "      <td>142.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>顺义区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1020</td>\n",
       "      <td>62.74</td>\n",
       "      <td>31.12</td>\n",
       "      <td>31.61</td>\n",
       "      <td>1591.60</td>\n",
       "      <td>107.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>昌平区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1344</td>\n",
       "      <td>61.14</td>\n",
       "      <td>30.72</td>\n",
       "      <td>30.41</td>\n",
       "      <td>753.39</td>\n",
       "      <td>201.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>大兴区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1036</td>\n",
       "      <td>68.38</td>\n",
       "      <td>34.02</td>\n",
       "      <td>34.36</td>\n",
       "      <td>1796.95</td>\n",
       "      <td>169.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>门头沟区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1451</td>\n",
       "      <td>25.12</td>\n",
       "      <td>12.80</td>\n",
       "      <td>12.32</td>\n",
       "      <td>157.86</td>\n",
       "      <td>31.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>怀柔区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>2123</td>\n",
       "      <td>28.29</td>\n",
       "      <td>14.13</td>\n",
       "      <td>14.16</td>\n",
       "      <td>259.41</td>\n",
       "      <td>39.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>平谷区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>950</td>\n",
       "      <td>40.20</td>\n",
       "      <td>20.22</td>\n",
       "      <td>19.98</td>\n",
       "      <td>218.31</td>\n",
       "      <td>43.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>密云区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>2229</td>\n",
       "      <td>43.59</td>\n",
       "      <td>21.77</td>\n",
       "      <td>21.82</td>\n",
       "      <td>251.13</td>\n",
       "      <td>48.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>延庆区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1994</td>\n",
       "      <td>28.42</td>\n",
       "      <td>14.32</td>\n",
       "      <td>14.11</td>\n",
       "      <td>122.66</td>\n",
       "      <td>32.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   省级单位 地级单位  县级单位 区划类型  行政面积（K㎡）  户籍人口（万人）      男性      女性  GDP（亿元）  常住人口（万人）\n",
       "0    北京   北京   西城区  市辖区        51    146.47   72.88   73.59  3602.36     125.9\n",
       "1    北京   北京   东城区  市辖区        42     97.41   47.91   49.50  2061.80      87.8\n",
       "2    北京   北京   丰台区  市辖区       306    115.33   58.39   56.95  1297.03     225.5\n",
       "4    北京   北京   朝阳区  市辖区       455    210.91  105.43  105.48  5171.03     385.6\n",
       "5    北京   北京   房山区  市辖区      1990     81.28   40.76   40.52   606.61     109.6\n",
       "7    北京   北京  石景山区  市辖区        84     38.69   19.87   18.82   482.14      63.4\n",
       "8    北京   北京   海淀区  市辖区       431    240.20  120.08  120.12  5395.16     359.3\n",
       "10   北京   北京   通州区  市辖区       906     74.68   37.08   37.60   674.81     142.8\n",
       "11   北京   北京   顺义区  市辖区      1020     62.74   31.12   31.61  1591.60     107.5\n",
       "12   北京   北京   昌平区  市辖区      1344     61.14   30.72   30.41   753.39     201.0\n",
       "13   北京   北京   大兴区  市辖区      1036     68.38   34.02   34.36  1796.95     169.4\n",
       "14   北京   北京  门头沟区  市辖区      1451     25.12   12.80   12.32   157.86      31.1\n",
       "15   北京   北京   怀柔区  市辖区      2123     28.29   14.13   14.16   259.41      39.3\n",
       "16   北京   北京   平谷区  市辖区       950     40.20   20.22   19.98   218.31      43.7\n",
       "17   北京   北京   密云区  市辖区      2229     43.59   21.77   21.82   251.13      48.3\n",
       "18   北京   北京   延庆区  市辖区      1994     28.42   14.32   14.11   122.66      32.7"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#北京地区 删除重复数据\n",
    "file_data_bjinfo = file_data_bjinfo.drop_duplicates()\n",
    "file_data_bjinfo"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b0133ad3-9b9f-4b01-a68b-11dae7876dd3",
   "metadata": {},
   "source": [
    "2.缺失值的检查和处理\n",
    "- isnull(), 如果返回结果有True, 则数据中存在缺失数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "0571854a-a7f8-48f9-8601-af9adcda435e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>省级单位</th>\n",
       "      <th>地级单位</th>\n",
       "      <th>县级单位</th>\n",
       "      <th>区划类型</th>\n",
       "      <th>行政面积（K㎡）</th>\n",
       "      <th>户籍人口（万人）</th>\n",
       "      <th>男性</th>\n",
       "      <th>女性</th>\n",
       "      <th>GDP（亿元）</th>\n",
       "      <th>常住人口（万人）</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     省级单位   地级单位   县级单位   区划类型  行政面积（K㎡）  户籍人口（万人）     男性     女性  GDP（亿元）  常住人口（万人）\n",
       "0   False  False  False  False     False     False  False  False    False     False\n",
       "1   False  False  False  False     False     False  False  False    False     False\n",
       "2   False  False  False  False     False     False  False  False    False     False\n",
       "3   False  False  False  False     False     False  False  False    False     False\n",
       "4   False  False  False  False     False     False  False  False    False     False\n",
       "5   False  False  False  False     False     False  False  False    False     False\n",
       "6   False  False  False  False     False     False  False  False    False     False\n",
       "7   False  False  False  False     False     False  False  False    False     False\n",
       "8   False  False  False  False     False     False  False  False    False     False\n",
       "9   False  False  False  False     False     False  False  False    False      True\n",
       "10  False  False  False  False     False     False  False  False    False     False\n",
       "11  False  False  False  False     False     False  False  False    False     False\n",
       "12  False  False  False  False     False     False  False  False    False     False\n",
       "13  False  False  False  False     False     False  False  False    False     False\n",
       "14  False  False  False  False     False     False  False  False    False     False\n",
       "15  False  False  False  False     False     False  False  False    False     False"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_data_tjinfo.isnull()    #检测数据是否存在缺失数据"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dbd1d8ae-5ecd-402e-94fa-8cd6fa34ab8b",
   "metadata": {},
   "source": [
    "- 缺失数据的处理方式有删除数据、数据补齐、暂不处理三种，如果采用暂不处理的方式，则会影像数据的完整性。因此这里采用数据补齐的方式来处理缺失数据。\n",
    "- 我们使用平均值填充缺失数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "488463ef-ac2f-4be6-af3f-f75e0d840fc2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>省级单位</th>\n",
       "      <th>地级单位</th>\n",
       "      <th>县级单位</th>\n",
       "      <th>区划类型</th>\n",
       "      <th>行政面积（K㎡）</th>\n",
       "      <th>户籍人口（万人）</th>\n",
       "      <th>男性</th>\n",
       "      <th>女性</th>\n",
       "      <th>GDP（亿元）</th>\n",
       "      <th>常住人口（万人）</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>和平区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>10</td>\n",
       "      <td>42.32</td>\n",
       "      <td>20.37</td>\n",
       "      <td>21.95</td>\n",
       "      <td>802.62</td>\n",
       "      <td>35.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>河东区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>39</td>\n",
       "      <td>75.79</td>\n",
       "      <td>38.06</td>\n",
       "      <td>37.73</td>\n",
       "      <td>290.98</td>\n",
       "      <td>97.61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>河西区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>37</td>\n",
       "      <td>83.20</td>\n",
       "      <td>40.83</td>\n",
       "      <td>42.37</td>\n",
       "      <td>819.85</td>\n",
       "      <td>99.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>南开区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>39</td>\n",
       "      <td>87.28</td>\n",
       "      <td>43.30</td>\n",
       "      <td>43.98</td>\n",
       "      <td>652.09</td>\n",
       "      <td>114.55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>河北区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>27</td>\n",
       "      <td>63.42</td>\n",
       "      <td>31.86</td>\n",
       "      <td>31.56</td>\n",
       "      <td>415.67</td>\n",
       "      <td>89.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>红桥区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>21</td>\n",
       "      <td>51.66</td>\n",
       "      <td>25.93</td>\n",
       "      <td>25.73</td>\n",
       "      <td>208.16</td>\n",
       "      <td>56.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>东丽区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>460</td>\n",
       "      <td>37.70</td>\n",
       "      <td>18.83</td>\n",
       "      <td>18.87</td>\n",
       "      <td>927.08</td>\n",
       "      <td>76.04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>西青区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>545</td>\n",
       "      <td>14.85</td>\n",
       "      <td>19.85</td>\n",
       "      <td>20.38</td>\n",
       "      <td>1040.27</td>\n",
       "      <td>85.37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>津南区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>401</td>\n",
       "      <td>44.83</td>\n",
       "      <td>22.35</td>\n",
       "      <td>22.48</td>\n",
       "      <td>810.16</td>\n",
       "      <td>89.41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>北辰区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>478</td>\n",
       "      <td>40.39</td>\n",
       "      <td>20.09</td>\n",
       "      <td>20.30</td>\n",
       "      <td>1058.14</td>\n",
       "      <td>98.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>武清区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1570</td>\n",
       "      <td>92.27</td>\n",
       "      <td>45.86</td>\n",
       "      <td>46.41</td>\n",
       "      <td>1151.65</td>\n",
       "      <td>119.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>宝坻区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1523</td>\n",
       "      <td>71.10</td>\n",
       "      <td>35.72</td>\n",
       "      <td>35.39</td>\n",
       "      <td>684.07</td>\n",
       "      <td>92.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>滨海新区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>2270</td>\n",
       "      <td>128.18</td>\n",
       "      <td>66.04</td>\n",
       "      <td>62.14</td>\n",
       "      <td>6654.00</td>\n",
       "      <td>299.42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>宁河区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1414</td>\n",
       "      <td>40.00</td>\n",
       "      <td>20.21</td>\n",
       "      <td>19.79</td>\n",
       "      <td>525.37</td>\n",
       "      <td>49.57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>静海区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1476</td>\n",
       "      <td>59.79</td>\n",
       "      <td>30.35</td>\n",
       "      <td>29.44</td>\n",
       "      <td>667.83</td>\n",
       "      <td>79.29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>蓟州区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1593</td>\n",
       "      <td>86.24</td>\n",
       "      <td>43.86</td>\n",
       "      <td>42.38</td>\n",
       "      <td>392.55</td>\n",
       "      <td>91.15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   省级单位 地级单位  县级单位 区划类型  行政面积（K㎡）  户籍人口（万人）     男性     女性  GDP（亿元）  常住人口（万人）\n",
       "0    天津   天津   和平区  市辖区        10     42.32  20.37  21.95   802.62     35.19\n",
       "1    天津   天津   河东区  市辖区        39     75.79  38.06  37.73   290.98     97.61\n",
       "2    天津   天津   河西区  市辖区        37     83.20  40.83  42.37   819.85     99.25\n",
       "3    天津   天津   南开区  市辖区        39     87.28  43.30  43.98   652.09    114.55\n",
       "4    天津   天津   河北区  市辖区        27     63.42  31.86  31.56   415.67     89.24\n",
       "5    天津   天津   红桥区  市辖区        21     51.66  25.93  25.73   208.16     56.69\n",
       "6    天津   天津   东丽区  市辖区       460     37.70  18.83  18.87   927.08     76.04\n",
       "7    天津   天津   西青区  市辖区       545     14.85  19.85  20.38  1040.27     85.37\n",
       "8    天津   天津   津南区  市辖区       401     44.83  22.35  22.48   810.16     89.41\n",
       "9    天津   天津   北辰区  市辖区       478     40.39  20.09  20.30  1058.14     98.38\n",
       "10   天津   天津   武清区  市辖区      1570     92.27  45.86  46.41  1151.65    119.96\n",
       "11   天津   天津   宝坻区  市辖区      1523     71.10  35.72  35.39   684.07     92.98\n",
       "12   天津   天津  滨海新区  市辖区      2270    128.18  66.04  62.14  6654.00    299.42\n",
       "13   天津   天津   宁河区  市辖区      1414     40.00  20.21  19.79   525.37     49.57\n",
       "14   天津   天津   静海区  市辖区      1476     59.79  30.35  29.44   667.83     79.29\n",
       "15   天津   天津   蓟州区  市辖区      1593     86.24  43.86  42.38   392.55     91.15"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算常驻人口的平均数，设置为float类型并保留两位小数\n",
    "# .2f 保留两位小数\n",
    "population = float(\"{:.2f}\".format(file_data_tjinfo['常住人口（万人）'].mean()))\n",
    "#以字典映射的形式将需要填充的数据进行对应\n",
    "values = {'常住人口（万人）': population}\n",
    "file_data_tjinfo = file_data_tjinfo.fillna(value=values)\n",
    "file_data_tjinfo"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d6c1f0f0-844f-476c-a23b-91233a23a115",
   "metadata": {},
   "source": [
    "上述代码计算了“常住人口”一列的平均值，由于该列的数据类型为float类型且保留两位小数，所以这里使用格式化字符串使用平均值保留了两位小数，并强制转换为float类型，然后通过fillna()将平均值填充到缺失值所在的位置上\n",
    "- 从上面的结果可以看出，之前的NaN值已经被计算的平均值替代了"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "082250ea-290f-4f66-b2e1-78f859253506",
   "metadata": {},
   "source": [
    "3. 异常值的检测和处理\n",
    "- 所有数据确保补充完整之后，便可以对他们进行异常值检测，检测异常方式有两种：基本检测和箱型图。其中，基本检测准则是指假设一组检测数据只含有随机误差，对其进行计算得出标准偏差，按一定的概率确定一个区间，认为凡是超过这个区间的误差，就不属于随机误差，而是粗大误差，含有该误差的数据应被删除。箱型图是一种用于显示一组数据分散情况的资料的统计图，它主要包含6个数据节点，将一组数据从大到小排列，分别计算出他们的上边缘、上四分位数、中位数、下四分位数、下边缘及异常值。\n",
    "- 由于箱型图表现异常值的方式更为直观，所以在此我们使用箱型图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "e440ec52-08dd-4aa2-b376-24abd789f763",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGcCAYAAAAlG4EeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+t0lEQVR4nO3df3hU1aH/+89kZpIQSCThpwkpPxX8BQo3mmKltqWAChZrb/ULasVTj/foFRFMLae2SkuBRyxWzym2sSoPUmttD9ijFaEXLXpKqcmJpoagRjRRfkOIyYTAEMK6f6R7nCQzycxkTyaz8349T57J7LVnzZo1e/Z8Zu1fLmOMEQAAgEOkJLoBAAAAdiLcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAAR/EkugHdcebMGe3fv1+ZmZlyuVyJbg4AAIiAMUY+n0+5ublKSbF/nCWpw83+/fuVn5+f6GYAAIAYfPrppxoxYoTt9SZ1uMnMzJTU2jlZWVkJbk1oBw4cUFlZmSZPnqyzzz470c1JavSlPehH+9CX9qEv7ZEs/djQ0KD8/PzA97jdkjrcWJuisrKyem24aWxsVEZGhjIzM3ttG5MFfWkP+tE+9KV96Et7JFs/xmuXEnYoBgAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAk1KlTp/Tkk0+quLhYTz75pE6dOpXoJiHJEW4AAAnzve99T/3799dDDz2kV155RQ899JD69++v733ve4luGpJYUl9bCgCQvL73ve9p9erVGjZsmJYsWaIhQ4boyJEj+tnPfqbVq1dLkh5++OEEtxLJiJEbAECPO3XqlB599FENGzZMe/fu1fz585Wdna358+dr7969GjZsmB599FE2USEmhBsAQI9bu3atTp8+reXLl8vjabsRwePx6Mc//rFOnz6ttWvXJqiFSGaEGwBAj9uzZ48kafbs2SHLrenWfEA0CDcAgB43duxYSdLLL78cstyabs0HRINwAwDocXfeeac8Ho8eeOABnT59uk3Z6dOn9aMf/Ugej0d33nlnglqIZEa4AQD0uNTUVN177706dOiQRowYoQ0bNujYsWPasGGDRowYoUOHDunee+9VampqopuKJMSh4ACAhLAO83700Ud1//33B6Z7PB4VFRVxGDhixsgNACBhHn74YR0/flwPPfSQrr76aj300EM6fvw4wQbdwsgNACChUlNTdfvtt+viiy9WQUEBm6LQbYzcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAAR4k53Hz/+9/XnDlzAvcrKipUUFCg7OxsFRUVyRjT7TIAAIBoxRRuKioqtHbtWv385z+XJPn9fs2ZM0dTpkxRaWmpKisrtW7dum6VAQAAxCLqcGOM0R133KFFixZp7NixkqTNmzervr5ea9as0dixY7VixQo99dRT3SoDAACIhSfaBzz55JN655139N3vflcvv/yyZs6cqfLychUWFiojI0OSNHHiRFVWVkpSzGWh+P1++f3+wP2GhgZJUnNzs5qbm6N9KT2ipaUlcNtb25gs6Et70I/2oS/tQ1/aI1n6Md5tiyrcNDY26oEHHtA555yjvXv36tlnn9VPf/pTTZ06VaNHjw7M53K55Ha7VVdXp4aGhpjKsrOzOzz/ypUrtWzZsg7Tt27dGghIvVVZWVmim+AY9KU96Ef70Jf2oS/t0dv7sampKa71RxVuNm7cqOPHj+u1115TTk6Oli5dqosuukhPP/20FixY0Gbe9PR0NTU1yePxKC0tLeqyUOFm6dKlWrx4ceB+Q0OD8vPzNWPGDGVlZUXzUnrMwYMHVVZWpsmTJ2v48OGJbk5Soy/tQT/ah760D31pj2TpR2vLS7xEFW727t2ryy67TDk5Oa0P9ng0ceJEVVdX68iRI23m9fl8Sk1NVU5OjioqKqIuCyUtLa1DGJIkr9crr9cbzUvpMW63O3DbW9uYLOhLe9CP9qEv7UNf2iNZ+jHebYtqh+L8/HydOHGizbSamhr97Gc/086dOwPTqqur5ff7lZOTo4KCgpjKAAAAYhFVuLnmmmu0e/du/fKXv9TevXv1+OOP65133tGMGTNUX1+v9evXS5JWrVql6dOny+12a9q0aTGVAQAAxCKqzVI5OTl69dVXtWTJEi1evFjDhw/X888/r3Hjxqm4uFjz5s1TUVGRWlpatH379tYn8HhiKgMAAIhF1IeCFxYW6q9//WuH6XPnzlVVVZVKS0s1depUDRkypNtlAAAA0Yo63HQmLy9PeXl5tpYBAABEgwtnAgAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAARyHcAAAAR/EkugEAgL6tsbFRt912m9577z1NmDBBf/jDHzRgwIBENwtJjHADAEiYSy+9VCUlJYH7NTU1yszMVEFBgd56660EtgzJjM1SAICEsIKNy+XS9ddfr0cffVTXX3+9XC6XSkpKdOmllya6iUhShBsAQI9rbGwMBJumpiY9/vjjGj16tB5//HE1NTUFAk5jY2Oim4okRLgBAPS4m2++WZJ00003KT09vU1Zenq65s2b12Y+IBqEGwBAj9uzZ48k6b777gtZvnjx4jbzAdEg3AAAetzYsWMlSY888kjI8jVr1rSZD4gG4QYA0OOeffZZSdKGDRt08uTJNmUnT57Uc88912Y+IBqEGwBAjxswYIAKCgpkjFFGRobuvvtu7dmzR3fffbcyMjJkjFFBQQHnu0FMOM8NACAh3nrrrcDh4Bs3btTGjRsDZZznBt3ByA0AIGHeeust+Xw+zZw5UyNHjtTMmTPl8/kINugWwg0AAHAUNksBABKGyy8gHhi5AQAkBJdfQLwQbgAAPY7LLyCeCDcAgB7H5RcQT4QbAECP4/ILiCfCDQCgx3H5BcQT4QYA0OO4/ALiiXADAOhxXH4B8cR5bgAACcHlFxAvUY3c3H333XK5XIG/cePGSZIqKipUUFCg7OxsFRUVyRgTeEysZQAA5+PyC4iHqMLN//7v/+pPf/qT6urqVFdXp7ffflt+v19z5szRlClTVFpaqsrKSq1bt06SYi4DAPQdAwYM0NNPP63HHntMTz/9NJui0G0Rh5vTp0+roqJC06ZN08CBAzVw4EBlZmZq8+bNqq+v15o1azR27FitWLFCTz31lCTFXAYAABCriPe5+cc//iFjjC6++GLt27dPX/7yl1VcXKzy8nIVFhYqIyNDkjRx4kRVVlZKUsxl4fj9fvn9/sD9hoYGSVJzc7Oam5sjfSk9qqWlJXDbW9uYLOhLe9CP9qEv7UNf2iNZ+jHebYs43OzevVsXXHCB/uM//kODBw/WwoULdccdd+j888/X6NGjA/O5XC653W7V1dWpoaEhprLs7OyQbVi5cqWWLVvWYfrWrVsDIam3KisrS3QTHIO+tAf9aB/60j70pT16ez82NTXFtf6Iw838+fM1f/78wP3//M//1JgxYzRhwgSlpaW1mTc9PV1NTU3yeDwxlYULN0uXLg2ctVJqHbnJz8/XjBkzlJWVFelL6VEHDx5UWVmZJk+erOHDhye6OUmNvrQH/Wgf+tI+9KU9kqUfrS0v8RLzoeADBw7UmTNnNHz4cFVUVLQp8/l8Sk1NVU5OTkxl4aSlpXUIRJLk9Xrl9XpjfSlx5Xa7A7e9tY3Jgr60B/1oH/rSPvSlPZKlH+Pdtoh3KF68eLFeeOGFwP2SkhKlpKTooosu0s6dOwPTq6ur5ff7lZOTo4KCgpjKAAAAYhVxuLn44ov1gx/8QG+88YZee+013X333br11ls1Y8YM1dfXa/369ZKkVatWafr06XK73Zo2bVpMZQAAALGKeLPULbfcot27d+sb3/iGMjMzdd1112nFihXyeDwqLi7WvHnzVFRUpJaWFm3fvr218hjLAAAAYhXVPjcrV67UypUrO0yfO3euqqqqVFpaqqlTp2rIkCHdLgMAAIiFbdeWysvLU15enq1lAAAA0eKq4AAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFEINwAAwFFiDjezZs3SunXrJEkVFRUqKChQdna2ioqKZIwJzBdrGQAAQCxiCje/+c1vtGXLFkmS3+/XnDlzNGXKFJWWlqqysjIQemItAwAAiJUn2gccO3ZMS5Ys0fjx4yVJmzdvVn19vdasWaOMjAytWLFCd911lxYsWBBzWTh+v19+vz9wv6GhQZLU3Nys5ubmaF9Kj2hpaQnc9tY2Jgv60h70o33oS/vQl/ZIln6Md9uiDjdLlizRddddpxMnTkiSysvLVVhYqIyMDEnSxIkTVVlZ2a2ycFauXKlly5Z1mL5169ZAPb1VWVlZopvgGPSlPehH+9CX9qEv7dHb+7GpqSmu9UcVbl5//XVt27ZNFRUVWrhwoaTW0ZPRo0cH5nG5XHK73aqrq4u5LDs7O+TzL126VIsXLw7cb2hoUH5+vmbMmKGsrKxoXkqPOXjwoMrKyjR58mQNHz480c1JavSlPehH+9CX9qEv7ZEs/WhteYmXiMPNyZMndccdd+iJJ55oEyQ8Ho/S0tLazJuenq6mpqaYy8KFm7S0tA6PkSSv1yuv1xvpS+lRbrc7cNtb25gs6Et70I/2oS/tQ1/aI1n6Md5ti3iH4p/85CcqKCjQNddc02Z6Tk6Ojhw50maaz+dTampqzGUAAACxinjk5rnnntORI0c0cOBASa3by1544QWNGjWqzY5B1dXV8vv9ysnJUUFBgX79619HXQYAABCriEdu3nzzTVVUVOidd97RO++8o2uvvVY//vGP9cYbb6i+vl7r16+XJK1atUrTp0+X2+3WtGnTYioDAACIVcQjNyNGjGhzf8CAARo8eLAGDx6s4uJizZs3T0VFRWppadH27dtbK/d4YioDAACIVdSHgluCT7g3d+5cVVVVqbS0VFOnTtWQIUO6XQYAABCLmMNNe3l5ecrLy7O1DAAAIFpcOBMAADgK4QYAADgK4QYAADgK4QYAADgK4QYAADgK4QYAADgK4QYAADgK4QYAADgK4QYAADgK4QYAADiKbZdfAAAg2Pvvv6/HHnssonkP1Tdp9zGj817YpGFnZXQ5/z333KPx48d3t4lwKMINACAuHnvsMT3xxBNRPWZ3FPOuXbs2ugahzyDcAADi4p577ol43p2HjA5kTtDZvvdUOMxla93oewg3AIC4GD9+fMSjK4t/s1Mb363VlV+/TGvmF8a5ZXA6digGAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOQrgBAACOElO4qa2t1Y4dO3T06FG72wMAANAtUYeb559/XuPGjdNdd92lL3zhC3r++eclSRUVFSooKFB2draKiopkjAk8JtYyAACAaEUVbj777DPdfffdevPNN/X222/rV7/6le6//375/X7NmTNHU6ZMUWlpqSorK7Vu3TpJirkMAAAgFlGFG5/Pp5///Oe68MILJUmTJk1SXV2dNm/erPr6eq1Zs0Zjx47VihUr9NRTT0lSzGUAAACx8EQzc35+vubPny9Jam5u1iOPPKJvfvObKi8vV2FhoTIyMiRJEydOVGVlpSTFXBaK3++X3+8P3G9oaAi0pbm5OZqX0mNaWloCt721jcmCvrQH/Wgf+tI+Z8yZwC19GbtkWSbj3baowo2lvLxcX/nKV5Samqr33ntPP/nJTzR69OhAucvlktvtVl1dnRoaGmIqy87O7vC8K1eu1LJlyzpM37p1ayAg9VZlZWWJboJj0Jf2oB/tQ19235HDkuTRkcNH9MorryS6OUmvty+TTU1Nca0/pnAzceJEbdu2Tffdd58WLFigc889V2lpaW3mSU9PV1NTkzweT0xlocLN0qVLtXjx4sD9hoYG5efna8aMGcrKyorlpcTdwYMHVVZWpsmTJ2v48OGJbk5Soy/tQT/ah760z+u/K5EO12nI0CG6+uqCRDcnaSXLMmlteYmXmMKNy+XSJZdconXr1mnkyJFauXKlKioq2szj8/mUmpqqnJycmMpCSUtL6xCGJMnr9crr9cbyUuLO7XYHbntrG5MFfWkP+tE+9KV9UlwpgVv6MnbJskzGu21R7VD82muvqaioKHDf42nNRhMmTNDOnTsD06urq+X3+5WTk6OCgoKYygAAAGIRVbiZMGGCfvWrX6m4uFiffvqpvv/972vGjBm65pprVF9fr/Xr10uSVq1apenTp8vtdmvatGkxlQEAAMQiqs1Subm5+v3vf697771X9913n2bOnKlnn31WHo9HxcXFmjdvnoqKitTS0qLt27e3PkGMZQAAALGIep+bmTNnhjxce+7cuaqqqlJpaammTp2qIUOGdLsMAAAgWjHtUBxOXl6e8vLybC0DAACIBlcFBwAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjkK4AQAAjuJJdAOS1dGjR7Vly5Yu59t3uFZ/e2+/3v+oRnlDB3U5/8yZMzV48GA7mggAQJ9EuInRli1bdNNNN0U8/4sRzrdhwwbNnz8/pjYBAADCTczGjBmjr33ta13Ot685Q0ezL9TgugrleZsiqhcAAMSOcBOjZ599Vtu2bYtw7pd0VNJ7Ecx57rnn6otf/GI3WgYAQN9GuInRPffcE9F8r9dm6rN+uRp4Yr++MshnW70AACA0wk2Mxo8fr7Vr13Y537f+c7tK9zbqghEXa+3/++UeaBkAAH0bh4IDAABHIdwAAABHIdwAAABHIdwAAABHIdwAAABHiSrc/PGPf9SYMWPk8Xh02WWXaffu3ZKkiooKFRQUKDs7W0VFRTLGBB4TaxkAAEAsIg43e/bs0YIFC7Rq1Srt27dPI0eO1He/+135/X7NmTNHU6ZMUWlpqSorK7Vu3TpJirkMAAAgVhGf52b37t1asWKFvv3tb0uS/u3f/k2zZs3S5s2bVV9frzVr1igjI0MrVqzQXXfdpQULFsRcFo7f75ff7w/cb2hokCQ1Nzerubk51j6IK6Mzgdve2sZk0dLSErilL2NHP9qHvrTPGXMmcEtfxi5Zlsl4ty3icDN79uw2999//32NGzdO5eXlKiwsVEZGhiRp4sSJqqyslKSYy8JZuXKlli1b1mH61q1bA/X0Ng31kuRRQ32DXnnllUQ3xxHKysoS3QRHoB/tQ19235HDkuTRkcNHWFfaoLcvk01NXV9rsTtiOkPxqVOn9Mgjj+jee+/VRx99pNGjRwfKXC6X3G636urq1NDQEFNZdnZ2yOddunSpFi9eHLjf0NCg/Px8zZgxQ1lZWbG8lLhb/+n/SL4mZZ2Vpauv/lKim5PUDh48qLKyMk2ePFnDhw9PdHOSFv1oH/rSPq//rkQ6XKchQ4fo6qsLEt2cpJUsy6S15SVeYgo3DzzwgAYMGKB//dd/1QMPPKC0tLQ25enp6WpqapLH44mpLFy4SUtL6/AYSfJ6vfJ6vbG8lLhz/XO3JpdSem0bk4Xb7Q7c0pexox/tQ1/aJ8WVErilL2OXLMtkvNsW9aHgf/7zn/XLX/5Szz33nLxer3JycnTkyJE28/h8PqWmpsZcBgAAEKuows1HH32k+fPn64knntD5558vSSooKNDOnTsD81RXV8vv9ysnJyfmMgAAgFhFHG5OnDih2bNna+7cufrGN76hxsZGNTY26oorrlB9fb3Wr18vSVq1apWmT58ut9utadOmxVQGAAAQq4j3udmyZYt2796t3bt368knnwxM//jjj1VcXKx58+apqKhILS0t2r59e2vlHk9MZQCA3m3t6x9q/2cnbKuvbG9j4PaBTe/aUmfuwH668yvjbKkLySXicDN37tywZxAeNWqUqqqqVFpaqqlTp2rIkCFtHhdLGQCgd1r7+od6eMv7cam7us6v6r9/YmudBJy+J6ajpULJy8tTXl6erWUAgN7HGrE5Z+gAXTbanv0kDx2r1z8+rdXE/EEalnNWt+v7+8fHVHW40dbRJSQP28INAKBvuWx0jpZfd5Etde3fv18lJbUqKBij3Nzcbtf3wKZ3VXW40YaWIRlxVXAAAOAohBsAAOAohBsAAOAohBsAAOAohBsAAOAohBsAAOAohBsAAOAohBsAAOAohBsAAOAohBsAAOAoXH6hHbuvdFt97GTg1q4r3Upc7RYAgHAIN0HieaXbo02ntcHmK91KXO0WAID2CDdB4nGl27erD2vXoZO6YFi6Lhk11JY6udotAADhEW5CsPNKt8+8VqFdW2v0rUnDtOCrF9pSJ1e7BQAgPHYoBgAAjsLITQgfHT2uF9/eZ0td7x5oDNzaVedHR4/bUg8AAE5EuAli7cOyY0+tduyptbXuje/WauO79tbJPjcAAHREuAly8vQZSa07FN9l01FI+w4d0Y7KGk09f6Tyhg2xpc5fvP6hqg43BtoLAAA+R7gJYUhmmuZekmdLXfv3uzTCX6OCiUOVm5trS50vlH7KDsUAAITBDsUAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRCDcAAMBRuCo4ACAujh49qi1btkQ0777Dtfrbe/v1/kc1yhs6qMv5Z86cqcGDB3e3iXAowg0AIC62bNmim266KarHvBjhfBs2bND8+fOjbhP6BsINACAuZs6cqQ0bNkQ07xvvH9J/f+LWtV9o0bTxwyKqGwiHcAMAiIvBgwdHPLpy6rUKbdlao8IrR2r+Vy+Mc8vgdOxQDAAAHCXqcFNbW6vRo0eruro6MK2iokIFBQXKzs5WUVGRjDHdLgMAAIhFVOHm6NGjmj17dptg4/f7NWfOHE2ZMkWlpaWqrKzUunXrulUGAAAQq6jCzY033qgbb7yxzbTNmzervr5ea9as0dixY7VixQo99dRT3SoDAACIVVQ7FBcXF2vMmDFatGhRYFp5ebkKCwuVkZEhSZo4caIqKyu7VRaO3++X3+8P3G9oaJAkNTc3q7m5OZqXEpI5cyZwa0d9ktTS0hK4tavOeLQzGcSjL/si+tE+fbUvz/xzHXTGxnXQGdMSuLWjzni0MRkkyzIZ77ZFFW7GjBnTYVpDQ4NGjx4duO9yueR2u1VXVxdzWXZ2dsjnX7lypZYtW9Zh+tatWwMhqTuOHpXcmbu0p3mP/p/f2zuK9N9v/rdtde1pltyZY3X06AV65ZVXbKs3WZSVlSW6CY5AP9qnr/VlTY0keVRTU61XXqm2pc7qI611Vn9crVcau19nPNqYTHr7MtnU1BTX+rt9KLjH41FaWlqbaenp6Wpqaoq5LFy4Wbp0qRYvXhy439DQoPz8fM2YMUNZWVndfSl6/mCJDng36oTbr7d6b+CVMqV+GeUa3Hylrr66INGt6TEHDx5UWVmZJk+erOHDhye6OUmLfrRPX+3Lv/93pf56eK/O9B+i03kjbKnT59srqU6+dHvqPHOwtb6RI0fp6qvP73Z9ySJZlklry0u8dDvc5OTkqKKios00n8+n1NTUmMvCSUtL6xCIJMnr9crr9XbjVbRypaToxIFvKm/Yfn39wq5PIhWJpuNNOnzosIYOG6qM/t0fXZKkP1cc0r5DuXINTbHldScLt9sduO1Lr9tu9KN9+mpfHmxo3T3gbx/X6W8f19la94sVdXqxwr46Dzb4+9R7kyzLZLzb1u1wU1BQoF//+teB+9XV1fL7/crJyYm5LJFafJM0YuhX9cPCQlvq279/v0oaS1RwXoFyc3NtqXP3uzv1ia9WGmpLdQAQlZOnW/dnOWfoAN31lXG21Lnv0BHtqKzR1PNHKm/YkG7X94vXP1TV4cZAW9G3dDvcTJs2TfX19Vq/fr1uueUWrVq1StOnT5fb7Y65DADQ+w3JTNPcS/JsqWv/fpdG+GtUMHGoLT8EXyj9VFWHG21oGZKRLfvcFBcXa968eSoqKlJLS4u2b9/erTIAAIBYxRRu2p9JeO7cuaqqqlJpaammTp2qIUOGdLsMAAAgFrZdODMvL095eaGHJ2MtAwAAiBYXzgQAAI5CuAEAAI5CuAEAAI5CuAEAAI5i2w7FAIC+IyVtv+pcVXr5o6O21PdZ3Wf6yP+RfAd8GnhyYLfrq3N9qJS0/pIGdbsuJB/CDQAgaum5v9Ve7xEtfdPmiiu6niUiXik9d4iki2yqEMmEzVIAgKileD9LdBO6lAxtRHwwcgMAiNqJA9/q1RcZti4wzDX4+ibCDQAgar39IsNcYLhvY7MUAABwFMINAABwFMINAABwFMINAABwFMINAABwFMINAABwFMINAABwFM5zAwCISvPpM5Kk/Z+d0Itv77Olzn2HjmjHHmlv2mHlHTLdru+Iz29Dq5CsCDcAgKgcaWwNDtW1TVr0u3dsrNmjvx7eJ8mewCRJ6R42UPRFhJt2evuVbiWudgsgsS7KO0vVtU0qGJmt+YUjbanzjcpPtfHdWn3zokGadn5+t+t7ofRT7dhTq9yB/WxoXXJobGzUbbfdpvfee08TJkzQH/7wBw0YMCDRzUoIwk07acNe0l7vx733SreS5JXSho0WV7sFkAhn9fNKksYPz9TcS/JsqbOurk4b363VRWcPsKXO0upj2rGn1oaWJYdLL71UJSUlgfs1NTXKzMxUQUGB3nrrrQS2LDEIN+34D83RqPzjuuur42yp77O6z/TRno80ZuwYDcweaEudv3jtQ71/qL80wpbqAABJzAo2LpdL3/zmN/WlL31J//M//6ONGzeqpKREl156aZ8LOISbIE2nTuuMP1eH9nu1s/xsW+o8dGyg/vFppib6BmlYzlm21HnscLrO+NlZDgD6usbGxkCwaWpq0rFjx1RSUqJvf/vb2rBhgzIyMlRSUqLGxsY+tYmKcBPkk9omSdKxpmZt+PsnNtbs0Z+r6iXV21inlJPB2wcAfdnNN98sSbrpppuUkpKiJ598Um+99ZbeeecdLV26VPPmzdNvfvMb3Xzzzdq0aVOCW9tz+HYM8q0pI1T85se6OP8sXZhrzyjL/3xwSNV1fo3KTtOXzh1mS50fHT2uHXtqNf18e0aXAAC92/vvv6/HHnusw/Q33nhDkvTWW28pPT1dJsUjz1nD9MqWP2vZsmUaN25cYL4777wzZN333HOPxo8fH7/GJwDhJsj5/ww0t04dbdtOcot/c1zVdX5NHjFAy6+zZwfgF9/e16d2lAOAvu6xxx7TE088Ebb8/fffb/2npVmnj+2VJBlJVVVVkqRjx451+vi1a9fa1tbegHCDpMAhjgD6snvuuSfk9BMnTmjdunWSpH/5l39RWZ1XBzIn6Gzfe5qc3aynnnpKknTrrbeqX7/Qh8WHqzuZEW7Q63GII4C+bvz48SFHV37+858H/n/66ad1wayb5f1CrpoPl+npTc8GyiZNmqRFixb1QEt7B07diF4t+BDH4cOHB/5cLlfgEEcA6Kv27NkjSbr44otljFHF5vX69OffVsXm9TLGaNKkSW3m6ysYuUGvZR3iKEnGGB08eLDDPH3xEEcg0eqaTkmStn9wRA9seteWOt+uPixJ+kP5Ie2p7/61pf7+8bFu15EMxo4dK0m66667dOONN2rK3H/RsYyRymmq0f+++JSee+453XHHHYH5+grCDXot6xBHSSFPTmWMCczXlw5xBBLtH3tbT2vxad0Jm0+bIe06dFK7DtlXp9Mvv3DnnXeqqKhIDzzwgG699VZds+BebXy3VtdcNEjp6en60Y9+JI/HE/ZIKaci3KDX+uCDDwL/hzo5lbVzXPB8AOLvJ9+4UD/8Y4UmjjhL2RmpttT56rv7dbTptAZneDTrolxb6swd2E93fsWes833Vqmpqbr33nu1evVqjRgxQhOvXyhlTlLFjtc0YslcHTp0SEVFRUpNted9ShaEG/RatbWth7uPHj1a6enpbcrS09M1cuRI1dTUBOZD1zjqzD59uS+vnDBUb074apfzHT16VFu2bImozv/v4EEpa4I8Byt03tiuL1w8c+ZMDR48OKK6ne7hhx+WJD366KPa/l9P6+xbH9P2/3paZ2prVVRUFCjvSwg36LVycnJ06NAhVVdX6+TJk23KTp48qU8++SQwH7rGUWf2oS8js2XLFt10000RzZs6bKzOvvUxvb3xCd30RNc7v27YsEHz58/vbhMd4+GHH9by5cs16+6f6iNJl39jvl79jx/0uREbC+EGvdb48eO1e/duGWOUkZGh6667TpdffrnWr1+vTZs2Bfa5cdqZNeOh/ZdxsL56Yb1Y0ZeRmzlzpjZs2BDRvOtLDup9SVfe+G+6pWB4RHU7xdrXP9T+z07YUteZMVdIdX6dGXOFfvyn922pU0q+TXyEGyRUuFOKS9KgQYMC/xtjtHHjRm3cuDHkfKF2lnPiKcVjEXzUmSRdf/31gR2z/+u//ksSR51Fir6MzuDBgyMeXflf7dT779bqgoLLNX9+YZxb1nusff1DPbzFvhBiqa7zq9rmnb0lJU3AIdwgoVatWhU4u2asrDNwtnfixAk988wz3arbCW644YbA/ydOnAi7Y/YNN9ygP/3pT4lqZlKgL6PT2Y+X9nYeMvJnTtBfql7WnX9d3+X8TvnxYo3YnDN0gC4bHX4Te5OvQdW73+6yvtrGk/q0UcofIA0akN7l/JI06rxLlJGZFbb87x8fU9XhRttGl3oC4QZxE8lQ675+Y+TJGdHpPGdOndCZps+kMy2fT0xxKyVjoFJSwx/mua/fmC7PwZFsQ62xePPNNyVJs2fPDrlj9lVXXaXNmzcH5kN49GV0uroeUigHJXX9Fd7KKddDcmeWy5+1X/vd4Uf7Dvhq9M7ev0Rc5wefSfossnlPjLhSZw8cGbbcn9Uo94lcSV+I+PkTjXCDuIh4qDVrsvJu/2WXs5kzLfLv3aWWxjq5B2QrbcQFcqW4O33MB5I+iHBY1ukBB0iEaK5ZdKi+SbuPGZ2X49KwszJsrbs3q65tUr+z/6A6d7NK6jqZcYA0eHp8jg7bqwrtrasIP4NX6ne2V/s/mx6X548Hwg3iorT6mNzZf5G3f5VtdfZr86Ph77bU2Xz8HJVWD7GlrkT729/+ph/+8Icdpns8rR/zl19+WV/5yldU3+zSAddgnW0e01leo7/85S+B+aZPD73y+slPfqIvfvGLcWt7bxOvvuxr/Rjuekih7N+/XyUlJSooKFBurj3nuUkGe+uadCZroNzuI4luSqfONA+UkujAq4SHm4qKCi1YsEAffvihvvvd7+rhhx+Wy+VKdLO6FOm25L66HfmQ76T6Ddsil6v7p1GPJ8+Aj3Sy+fpEN6NLRb8v174uNvG9+dtfaO/r2zqdx/rylVqH/4PV1dVp27bQj3//zHBd8X86/1zmDeyn1f/3pE7nSbRI+lGKX19G0o9ScvQl7DFyUD99UvN/lJl5tNOzKZ/2n1T9kX0dppuWM/q4dJtSvOkadfHl8p0yOuHur34tx5WZ6lL1O3/VmeaTGv1/fU0ud+jLSZ41JE+etPD75xz2nVTdZ4OUe3HynO05oeHG7/drzpw5mjlzpp5//nktXLhQ69at04IFCxLSnoaTzZKkNz7oOkE/9cTTevnJyLclR7od+dPTmfqXf1vY6Tyl1b3/mikfH22S/8TMLkdujGnRmVP+uLQhJTVNLlfnm66aj5+jvDG9+wN769N/15tH/tBlX2Zc16Jx13w9ZJk56VNz3X7JlSKZM58X/PO+NztXrvTMsHWnpNarzL+q0+f/+0fnSL9Xr/1SjrQfpfj1ZST9KPX+voR9PjrSpDP+XNX7c1XfybkLzzSfVHNt6HWVd9i5kqRPg7bC+9W6y40751y5Je39NHzdB30jlOLteufjZLqURULDzebNm1VfX681a9YoIyNDK1as0F133RU23Pj9fvn9n38RNjQ0SJKam5vV3Nzc7faU1bSGho1v79PGtzsm5GCn0ydp0OwlEdTqksvtlWlpltT1KMbb6ZO06HfvRFCvdFZ6ii2vOx6WzjpHK1+VRqXP1lnp3rDzvb31D/pw46NxacP4b96rS2Z8q9N5ckelacXc83ttP0rSW9V16jem+6Ng/UaEPxpCavznX+w8Az7SmTM39tq+tKsfJfoyHlpaWgK3fel1Pzh7vJa9/L4uysvUwH7ht/u8uuEXevvFJ+PShkvm3q5ZN93V6TxnD0zT7V8aadt7E+/3OKHhpry8XIWFhcrIaN15bOLEiaqsrAw7/8qVK7Vs2bIO07du3RqoozvON9K+wSkaOeCM+nXRMxUl/9CLL/+s28/Z3tx/vV8XTux6m/wAr9TwwVt6pZdeVilL0sopUuvvh/DSMo8p4jM8uL3ynDVMp+sPSS1dfzAKM4/puuFdj8K98sreSFuQELNzpRcPfV39sj6Uu5MtGr6jB3Tqs0NxaUPqwGHKHHx2p/NkNY/VtOy9vbY/I+1HKX59GUk/Sr2/L+OprKws0U3ocfdNkCRfp/PkTZ+kl0/O6jC9urpa7733ni644ALl5+frpPHqkCtbw0yd0l3N+uSTT1RZWakJEyZo1KhRIeuePX2SRniqO29ko/TKK/adj6epqcm2ukJxGes0rwmwZMkSnTx5Ur/4xS8C04YMGaIPPvhA2dnZHeYPNXKTn5+vo0ePKiurs19S9jt69Kj+/Oc/dznf/sN1+nvVAV12ztnKHdrxNbX39a9/vU9dLyXSfpToy67s3LlTDz74YJfzfXZSOuAerLNbjmpgZKfB0LJly1RY2HdOrBavvuxr/RiNgwcPqqysTJMnT9bw4V2foRitTp06pYEDByonJ0fV1dU6evRooB8HDx6sUaNG6dixY/rss8961aUYGhoaNHjwYNXX18fl+zuhIzcej0dpaWltpqWnp6upqSlkuElLS+swvyR5vV55veE3fcTD2WefrVtuuaXL+fbv36/z+uARAJGKtB8l+rIrV1xxhV577bUu5+urR6VEg77seW63O3Db0+vzZOb1egNXBR89erQWL16sIUOG6Le//a3WrFmjw4cPq6ioSP379090U9uI93uc0HCTk5Ojioq2x9b7fL5elS4BAOjNgq8Kfv/99wemezyePntV8NDHhfWQgoIC7dy5M3C/urpafr+fqzwDABCFhx9+WMePH9dDDz2kq6++Wg899JCOHz/eJ4ONlOCRm2nTpqm+vl7r16/XLbfcolWrVmn69OmB4UkAABCZ1NRU3X777br44otVUFDQp7eCJHyfm+LiYs2bN09FRUVqaWnR9u3bE9kkAACQ5BJ+huK5c+eqqqpKpaWlmjp1qoYMccap8AEAQGIkPNxIUl5envLy8hLdDAAA4AAJ3aEYAADAboQbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKIQbAADgKL3iPDexMsZIar10em/l8/nU1NQkn8/Xq9uZDOhLe9CP9qEv7UNf2iNZ+tFqm/U9brekDjc+n0+SlJ+fn+CWAACAaPl8Pp111lm21+sy8YpNPeDMmTPav3+/MjMz5XK5Et2ckF599VXdcMMN+t3vfqdZs2YlujlJjb60B/1oH/rSPvSlPZKlH40x8vl8ys3NVUqK/XvIJPXITUpKikaMGJHoZnQqIyMjcJuVlZXg1iQ3+tIe9KN96Ev70Jf2SKZ+jMeIjYUdigEAgKMQbgAAgKMQbuJszJgxcrvdGjNmTKKbkvToS3vQj/ahL+1DX9qDfmyV1DsUAwAAtMfIDQAAcBTCDQAAcJSkCDenTp1KdBMc4corr1ReXl6n8zQ2NuqnP/1ph+lPPvlkxM9z7NgxXXXVVfrb3/4WdRuTwfe//33dd999iW6GLX7/+9/rxRdfTHQzeq0RI0Zo8ODBEc1LX/YteXl5bd7vRx55RFdffXXEj2c9GWeml3v11VeNJDNv3jxjjDE1NTXG5/O1maempsbU1NR0WdfUqVPNkiVLOkyvra2N6PHGGOPz+SKet7cZMWKEkWSqqqrCzpOenm4kmR07dhhjjKmqqjI1NTVGkhk0aJAxxhi/32927doVtg7rPbvkkkvsfQFRyMvLM8GLd6TLiM/nMyUlJebAgQNh53G5XKarj05xcbGRZB5//PHIG90NL730kpEU0V8wl8tlXC5Xh/oGDhzY5WvsC0L1WTj0Zfdce+21ZuHChT3+vI8//rhxuVymtrbWGNP6nns8HmOMMcuXLzeSzPbt29s8ZtOmTUaSSU1NDUyz1gt+vz+i52U9Gd/1ZK//xLXvIOt++z9rYZw0aZIpKCgwhYWFprCw0BQUFJiLLrrIGBN+RZWTkxPxF0OoOjZt2mQuuugik5KSYs466yzbXnv7Ba+7fD6fkWR++MMfBqYFL1zXX399yNeakpLSZR8EKykpMZLMbbfdZlvbo3Huued2eJ1paWlRvb/nnHNO2Pqt/ujKkCFDQq4Y42HXrl1Gklm0aFFgmqQ2X7YXXnhhh3anpKSYlJSUNo8pLCw0w4YN6/NfyFafXn755RHNT192bfv27UaSuf766zuUWZ+9kpKSiAPCrl27AqEklJKSkg7TUlJSTEZGRuC+9ePN6/UaY1q/Y6z/w63r+vXrZySZV1991ezatcv4fD7zzDPPGK/XG/jxeODAgU5/SLKebBWv9WSv/sTl5uYaSWblypURPybcm/D8888bSWb58uUR1WO9yZ0ZNGhQm+fwer3miiuuiCgQdSXUgifJzJo1yxhjzMKFC40kk5mZGbYOq/+i+cvLyws8V3p6urnkkksC4eeFF14wksydd97ZadurqqqMJPOd73yn0/kmTZoUUZuysrIi7DVj1q9f3+FLPhRJZsCAARHXGyzSD60xkS1Hdjhw4EDU4aakpCTwWoYPH25qa2uNJLN27drAsmNM68hmZ7/QnCT4i9Ia6QwneAQ5Gfvyhz/8YZvP2fLly01WVlaHHzbWF7S1/AT/jR07NuLns35cpaWldSizPrdz584Nux646qqrOjwukvVHe9aoS2ZmZtTrR2M+D0Mul8ukpqZG/fwW1pOfi8d6steGmwcffNBIMkOHDjVXXXVVVAHHmI4rcmtzS6QLniQzYsSITp9j165dZseOHWbRokVt6nK5XG1+GbQv70q4Bc8KN9aKs6s6Q62cFy5c2Gbo11qBBS9cksySJUsCv3rGjBkTGBmTZL785S93+rzWezd16tRO57v88su7fA2STHZ2dqfzBPN4PG2Gijur1wpy0Yqk79vPH2pzqJ2scBPpCnrt2rWB996a/rWvfS3sY6xfsk4Xy5ddsval1Z4777yzzRekJLNmzZo2P46M+Xyd+sILL5iVK1ea/v37GynyH4wejyfkZjurzHqef//3fzePP/64KS4uNvfff38gSNjJ+qEmtW4WWr16dad/I0eODLTPGrVJSUlpE1Zvv/32NuuFroIs68mO89u5nuy14cbv95thw4a1WWl3NsxXVVXVZkEKDjdW0rY2T1kfsuBhsPa/WCJJ4tZzWuHF7/ebqqqqboebcAueFW6slehLL73UYZ7t27cHQpH1gZw6darx+/2mpqbGpKWltVlRWB9S61dE8J/b7Q4bCjt7PePHjzeSTP/+/Tt9nbNmzYroQ2vt6xMJSWb16tURzRfqzwpxkTw20qFzj8dj0tPTI5o3Vtb7F+nITWFhYeC9d7vdHd7X4NGGvsjqzyuuuKLNdEkmNze3zbRk7MsbbrjBSK2bVYwxZsmSJUZSh1/QY8eONZLM+vXrQ27WlGRycnK6fL4dO3YYSSH31QsOGu1ZuyGE2vS0Y8eOiEbBysvLQ04fOnSokRQYBe/szxpR+ta3vhWYFrwJ0piO4aYrrCfbsns92bs+cSFYw36bNm0yAwYM6HQBzMjIMG6326SkpAQ+iNYXurUQG2NCvjkZGRmBBSh4m2zwAmsFIEu4dnQ33IRb8CQFfjWEWulGEsomTZrUZljYCjHt6xgzZoxZvXq1ueqqqwL9N2/evMDK7KqrrjLFxcUh2x/8C7Yz8frQdsXa1r1t27YOjw01ZB7qsVLkQ/KDBg2y/Zdne9a+DJEuB9bybn0hW6OB1jb03viF3JOsL75gV111lZHUYT+OZOzL9uuy2tpak5KSEljfWqwfl+edd17YcBPJfoajRo0K2wftl02LFRqDN82He1xXf6ECjvWj16or1PNYbbAEb4ayws327dtNTU1Nm++K2trawEEZ4bCebMvu9WTv+sS1M2PGjDZvvs/na7Od29ovJbjM2qkvePtwVlZWm2HPUAuCFZyMiTzcWKxfF9ZCYAUtSyzhJtx062/u3LkdytqPyET6Z31Ig3fWzsrKMkuWLDEej8d4PB7j9Xrb/Hk8npCbp/x+v5EU2Bl63LhxYV9nvD60Xe3tb/0aDfXYrlbU1jJg7QQXiczMzIh+6djJWimvXbs2ZPm4cePM2LFjA1/Ifr/fjBs3zmzbts34fL42X8g1NTWd7rTpRFLHzUfhjv5Ixr70er0hX0uodZz1GWwfbs455xwjydxxxx1dPl9GRkbIzXHWZ9H60WaxQlXwerS9l156yWzbts2Ul5e3CTHWur68vNyUl5eHHOEOfm2R/DCw+Hy+wCiUtd6M5HHtsZ7syO71ZK8NN8HbgcN1TnC4sbQfubFY226tBbn9Hurtw02ohTTUB3/NmjVt5rvjjjuiXtDbC7fgdVafJHPuueeGnNe6H+oDNHDgQJObmxt252NrR+z2f8OGDQvZduvIs5qamsA2+XCHjcfrQ1tQUNDlPKE+RFLn+1lZv/T69+8fWPlGsp27s/6ySzRhNngHQesLuat6Ro4cGdf29ya7du1qsw4YOHBg4JDgm2++Oezjkqkvg3/sBf+osTZDB5NaR2tD7VAc6U6s2dnZHTbjGGPM6tWrzaBBgzqMlFmjxe1P+xFOcNgIF9zCPa79ZhNrR+FIn896fPvNUp1tjmE9Gfr57FxP9tpws3z5cuPxeDrdizqacGPM56MboeqLdeQmuM7gtgZvson26IhwC57Uujnqy1/+spE+P3LKKgsVboJ/XYwZM6bL57X+MjMzzW233RYYXmw/3/Dhwzs83gp61j4J1tER4d6/eHxorSPYwrE2LaxZsybkc02ZMiXsY60vPGuFO2bMGCN1frSD1Sfhtvvbpf35n4K/vIIFvz/GdPxCtvTGTSk9rba2NvCla/1t2rQp7PzJ1JfBIeaZZ54JjC63X99am9iCN0stWrTIPPjggxHvS2GMMV/96lc77YPgcLNt2zYjyUycODHi+q11ljGRhxtrh95I/4I3RwavWy2R7nPDerKjeKwne9cnLoTuhpvly5ebSZMmGWM+314aang0lpEb6wvEWqgPHDhgSkpKAm/UvHnzAiMf1kkIIxFuwQsONO0/DOHCjRWSugo37UegrF8TwTv7Bf+136ny8ccfD/kB/c53vhMIgO1DXjw+tNaKItQh8tZKM9QvSGuYONz7ZH0ZtN8caAXbGTNmdHiM9Qsm1kMpY2Utc6F29Gzf1mT6Qk6ElStXdlj2U1JSQoacZOpL6/Bia4di60dM+/XteeedFwh1oX4wRkOSmT17dsiy4HATbp2/fv36kI+1RuWtI20iDTculyuwA/gLL7zQZnrwyI212av9a4kl3LCe7Ln1ZO/6xIUQTbgJt+3U2vnJCi2hhlJjGbmZPXu2uf322zvsU2MtpBbrw9Z+x6xwwi14weHGOtTU2u8lXLgJvh8u3FgLl6TACszv95ucnJyIRm6sbbPBaT2Y9eGUPj9izRjT6TmBgv+iOcTRmNZDSaW22+uDQ1rwyuOll14ygwYNCiwb7YeGrXNiSK0nZGvP+rC3/9ILXhZ7yqZNmwJnww31vNYO4cFfzC6Xq82Okdb+IMFfyD6fr0dORNgbHDhwwMybN6/NOVA8Hk9gubBGTUN9mSRTXwaPFixatCiw/Fvr2zVr1pj8/Pw267Luhptx48a1CVTBgsON1LopsL1w501pv7y3Dzfz5s3rMBJufXf88Y9/7DTcXHjhhYFlof1zpqSkmG3btplNmzaZ8vLywJFU5eXlpqSkxGzatMkUFxcHRrhYT/bserLXh5v2e+9bvva1r3VYiIO3HWdmZgbOjXPbbbcFplsfWKnt+QWskYpI/toLDkDWmx+8z0zwGxupUAtecLix7lt1zpgxI7Ap7NprrzVS27NHzpo1K+TOpcELl5Wcpc8vv/DMM890+kGynsvj8XS6fdw6YVhw6LNOENiZUCEvEsXFxW1ev3XOn/a/uIPfm+AVSvDzS53va2HM58HXOiLP5/NF9UuqO4KPTJBazw0VvMkg+LxI7fs7+JIB0S73ThR8wrSsrKywP0gyMzM7/ApNtr5sv19jcLgJDnbW57q74caYz4NH+80dwTueSq37/gR/gVrrj+DRhODTV9x///0dnsN6fPtQZIXNa6+9NlBH+01Owef1kjpuhrfWZeHOmB/898ILL7Ce/KeeXE/2jk9ZJ8LtO2AtVMHHxW/atKnDeXCCD92zfjEE7/T7zDPPGGNMh731Qwl3tJS1PdnatPP88893mGfhwoXG6/VGvIOcMR0XvGiEOu14KKEWrgcffDCqdoYbLg4lmu30vcGOHTs6vY5WsES+tttvvz3sofnGGHPzzTeb22+/vQdbBISWm5vb4TDp4Otvhdpx2fqzTpFhnThR6njm4lAjHcHr0cLCwoRcw4r1ZKueem0uY4yRg73xxhtavXq1XnrppQ5lP/jBD0JeARsA0LtdeeWVWrRokebOnZvopqAXcny4AQAAfUtKohsAAABgJ8INAABwFMINAABwFMINAABwFMINAABwFMINAABwFMINAABwFMINAABwlP8flQDLS5sIs1gAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "#设置中文字体\n",
    "plt.rcParams['font.sans-serif'] = ['SimHei']    #使用黑体\n",
    "plt.rcParams['axes.unicode_minus'] = False    #正常显示负号\n",
    "# 对北京地区信息进行异常值检测\n",
    "file_data_bjinfo.boxplot(column=['行政面积（K㎡）', '户籍人口（万人）', '男性', '女性', 'GDP（亿元）', '常住人口（万人）'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "11beabcc-f525-4e56-8aa2-9eb59537a8ea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGcCAYAAAAlG4EeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABBkElEQVR4nO3de1yUZf7/8TfM4CAKCuIhiRSxtFKzyDIrXUukUtPMtjK3crP6tu5WHqi1Wlt1U7fSyi0rO/m1k53MXMuSytStsDC1CG09YeYZRc6Ow3D9/vDL/EQODuONMLev5+PBQ+/7uuaaaz7M3LznnnvuO8QYYwQAAGATofU9AQAAACsRbgAAgK0QbgAAgK0QbgAAgK0QbgAAgK0QbgAAgK0QbgAAgK0463sCJ6KsrEw7d+5UZGSkQkJC6ns6AADAD8YYFRQUqG3btgoNtX4/S1CHm507dyo+Pr6+pwEAAAKwfft2nX766ZaPG9ThJjIyUtKR4kRFRdXzbKrm8Xi0dOlS9e/fX2FhYfU9naBGLa1BHa1DLa1DLa0RLHXMz89XfHy87++41YI63JR/FBUVFdWgw01ERISioqIa9BMtGFBLa1BH61BL61BLawRbHevqkBIOKAYAALZCuAEAALZCuAEAALZCuAEAALZCuAEAALZCuAEAALZCuAEAALZCuAEAALZCuAEAALZCuAEA1Cuv16vly5drxYoVWr58ubxeb31PCUGOcAMAqDcLFixQx44dlZycrJkzZyo5OVkdO3bUggUL6ntqCGKEGwBAvViwYIGGDRumrl27auXKlXr77be1cuVKde3aVcOGDSPgIGCEGwDASef1ejVu3DgNHDhQCxcu1MUXX6zGjRvr4osv1sKFCzVw4ECNHz+ej6gQEMINAOCkW7lypbKzs/XQQw8pNLTin6LQ0FBNmDBBW7du1cqVK+tphghmhBsAwEm3a9cuSVKXLl2qbC9fX94PqA3CDQDgpDvttNMkSZmZmVW2l68v7wfUBuEGAHDSXX755Wrfvr2mTp2qsrKyCm1lZWWaNm2aEhISdPnll9fTDBHMCDcAgJPO4XBoxowZWrx4sYYMGaL09HSVlJQoPT1dQ4YM0eLFi/Xkk0/K4XDU91QRhJz1PQEAwKlp6NChev/99zVu3Dj17t3btz4hIUHvv/++hg4dWo+zQzAj3AAA6s3QoUM1ePBgLVu2TEuWLNHVV1+tvn37sscGJ4RwAwCoVw6HQ3369FFRUZH69OlDsMEJ45gbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgKwGHm7/+9a8aNGiQbzkzM1M9evRQdHS0UlNTZYw54TYAAIDaCijcZGZmavbs2Xr66aclSW63W4MGDVJSUpIyMjKUlZWluXPnnlAbAABAIGodbowxuvvuu3X//fcrMTFRkrRkyRLl5eVp5syZSkxM1NSpU/XKK6+cUBsAAEAgnLW9wUsvvaS1a9dq1KhRWrx4sVJSUrRu3Tr17NlTERERkqRu3bopKytLkgJuq4rb7Zbb7fYt5+fnS5I8Ho88Hk9tH8pJUT6vhjq/YEItrUEdrUMtrUMtrREsdazr+dUq3BQWFuqRRx7RmWeeqd9++02vv/66HnvsMfXq1UsJCQm+fiEhIXI4HMrNzVV+fn5AbdHR0ZXuf9q0aZo0aVKl9UuXLvUFpIYqLS2tvqdgG9TSGtTROtTSOtTSGg29jsXFxXU6fq3CzYIFC1RUVKQvv/xSMTExmjBhgrp27apXX31VI0eOrNA3PDxcxcXFcjqdcrlctW6rKtxMmDBBY8eO9S3n5+crPj5e/fv3V1RUVG0eyknj8XiUlpam5ORkhYWF1fd0ghq1tAZ1tA61tA61tEaw1LH8k5e6Uqtw89tvv+niiy9WTEzMkRs7nerWrZuys7O1b9++Cn0LCgrUqFEjxcTEKDMzs9ZtVXG5XJXCkCSFhYU16F+iFBxzDBbU0hrU0TrU0jrU0hoNvY51PbdaHVAcHx+vkpKSCuu2bdumGTNmKD093bcuOztbbrdbMTEx6tGjR0BtAAAAgahVuBkwYIDWr1+vF154Qb/99ptmzZqltWvXqn///srLy9O8efMkSdOnT1e/fv3kcDjUu3fvgNoAAAACUauPpWJiYvTpp59q3LhxGjt2rNq0aaP58+erY8eOmjNnjoYPH67U1FR5vV4tX778yB04nQG1AQAABKLWXwXv2bOnvv7660rrhwwZoo0bNyojI0O9evVSy5YtT7gNAACgtmodbmoSFxenuLg4S9sAAABqgwtnAgAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAW6lVuPnLX/6ikJAQ30/Hjh0lSZmZmerRo4eio6OVmpoqY4zvNoG2AQAABKJW4Wb16tX6+OOPlZubq9zcXK1Zs0Zut1uDBg1SUlKSMjIylJWVpblz50pSwG0AAACB8jvclJaWKjMzU71791bz5s3VvHlzRUZGasmSJcrLy9PMmTOVmJioqVOn6pVXXpGkgNsAAAAC5fS3448//ihjjLp3764dO3aoT58+mjNnjtatW6eePXsqIiJCktStWzdlZWVJUsBt1XG73XK73b7l/Px8SZLH45HH4/H3oZxU5fNqqPMLJtTSGtTROtTSOtTSGsFSx7qen9/hZv369Tr33HP1r3/9S7Gxsbr33nt1991365xzzlFCQoKvX0hIiBwOh3Jzc5Wfnx9QW3R0dJVzmDZtmiZNmlRp/dKlS30hqaFKS0ur7ynYBrW0BnW0DrW0DrW0RkOvY3FxcZ2O73e4ueWWW3TLLbf4lp999ll16NBBnTt3lsvlqtA3PDxcxcXFcjqdAbVVF24mTJigsWPH+pbz8/MVHx+v/v37Kyoqyt+HclJ5PB6lpaUpOTlZYWFh9T2doEYtrUEdrUMtrUMtrREsdSz/5KWu+B1ujtW8eXOVlZWpTZs2yszMrNBWUFCgRo0aKSYmJqC26rhcrkqBSJLCwsIa9C9RCo45BgtqaQ3qaB1qaR1qaY2GXse6npvfBxSPHTtW7777rm/5+++/V2hoqLp27ar09HTf+uzsbLndbsXExKhHjx4BtQEAAATK73DTvXt3Pfzww1qxYoW+/PJL/eUvf9Htt9+u/v37Ky8vT/PmzZMkTZ8+Xf369ZPD4VDv3r0DagMAAAiU3x9L3XrrrVq/fr0GDx6syMhIXXfddZo6daqcTqfmzJmj4cOHKzU1VV6vV8uXLz8yeIBtAAAAgarVMTfTpk3TtGnTKq0fMmSINm7cqIyMDPXq1UstW7Y84TYAAIBABHxA8bHi4uIUFxdnaRsAAEBtceFMAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgKwGHm6uuukpz586VJGVmZqpHjx6Kjo5WamqqjDG+foG2AQAABCKgcPPmm2/qs88+kyS53W4NGjRISUlJysjIUFZWli/0BNoGAAAQKGdtb3DgwAGNGzdOnTp1kiQtWbJEeXl5mjlzpiIiIjR16lSNHj1aI0eODLitOm63W26327ecn58vSfJ4PPJ4PLV9KCdF+bwa6vyCCbW0BnW0DrW0DrW0RrDUsa7nF2Jq+VnQyJEjFR4erpKSEv3ud7/Ttm3btGrVKn3yySeSJGOMWrRooQMHDmjSpEkBtVXn73//uyZNmlRp/VtvvaWIiIjaPAwAAFBPiouLNXz4cOXl5SkqKsry8Wu152bZsmX64osvlJmZqXvvvVfSkb0nCQkJvj4hISFyOBzKzc0NuC06OrrK+58wYYLGjh3rW87Pz1d8fLz69+9fJ8WxgsfjUVpampKTkxUWFlbf0wlq1NIa1NE61NI61NIawVLH8k9e6orf4ebQoUO6++679fzzz1cIEk6nUy6Xq0Lf8PBwFRcXB9xWXbhxuVyVbiNJYWFhDfqXKAXHHIMFtbQGdbQOtbQOtbRGQ69jXc/N7wOKp0yZoh49emjAgAEV1sfExGjfvn0V1hUUFKhRo0YBtwEAAATK7z03b731lvbt26fmzZtLOvJ52bvvvqv27dtXODAoOztbbrdbMTEx6tGjh15++eVatwEAAATK7z03K1euVGZmptauXau1a9fq2muv1eTJk7VixQrl5eVp3rx5kqTp06erX79+cjgc6t27d0BtAAAAgfJ7z83pp59eYblp06aKjY1VbGys5syZo+HDhys1NVVer1fLly8/MrjTGVAbAABAoGp9nptyR59wb8iQIdq4caMyMjLUq1cvtWzZ8oTbAAAAAhFwuDlWXFyc4uLiLG0DAACoLS6cCQAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbCWgcLN//3598803ysnJsXo+AAAAJ6TW4Wb+/Pnq2LGjRo8erTPOOEPz58+XJGVmZqpHjx6Kjo5WamqqjDG+2wTaBgAAUFu1CjcHDx7UX/7yF61cuVJr1qzRiy++qAcffFBut1uDBg1SUlKSMjIylJWVpblz50pSwG0AAACBqFW4KSgo0NNPP60uXbpIks477zzl5uZqyZIlysvL08yZM5WYmKipU6fqlVdekaSA2wAAAALhrE3n+Ph43XLLLZIkj8ejJ598UkOHDtW6devUs2dPRURESJK6deumrKwsSQq4rSput1tut9u3nJ+f75uLx+OpzUM5acrn1VDnF0yopTWoo3WopXWopTWCpY51Pb9ahZty69atU9++fdWoUSNt2LBBU6ZMUUJCgq89JCREDodDubm5ys/PD6gtOjq60v1OmzZNkyZNqrR+6dKlvoDUUKWlpdX3FGyDWlqDOlqHWlqHWlqjodexuLi4TscPKNx069ZNX3zxhcaPH6+RI0fqrLPOksvlqtAnPDxcxcXFcjqdAbVVFW4mTJigsWPH+pbz8/MVHx+v/v37KyoqKpCHUuc8Ho/S0tKUnJyssLCw+p5OUKOW1qCO1qGW1qGW1giWOpZ/8lJXAgo3ISEhOv/88zV37ly1a9dO06ZNU2ZmZoU+BQUFatSokWJiYgJqq4rL5aoUhiQpLCysQf8SpeCYY7CgltagjtahltahltZo6HWs67nV6oDiL7/8Uqmpqb5lp/NINurcubPS09N967Ozs+V2uxUTE6MePXoE1AYAABCIWoWbzp0768UXX9ScOXO0fft2/fWvf1X//v01YMAA5eXlad68eZKk6dOnq1+/fnI4HOrdu3dAbQAAAIGo1cdSbdu21XvvvacxY8Zo/PjxSklJ0euvvy6n06k5c+Zo+PDhSk1Nldfr1fLly4/cQYBtAAAAgaj1MTcpKSlVfl17yJAh2rhxozIyMtSrVy+1bNnyhNsAAABqK6ADiqsTFxenuLg4S9sAAABqg6uCAwAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAWyHcAAAAW3HW9wSCVXFxsTZs2HDcfoUlbn3z02ZFx2aoaWPXcft37txZERERVkwRAIBTEuEmQBs2bFBSUpLf/R/3s9/q1at1wQUXBDYpAABAuAlU586dtXr16uP2+2XXQY197yfNvKGrOp3W3K9xAQBA4Ag3AYqIiPBrD0votv1yrSzR2V3OU/d2LU7CzAAAOLVxQDEAALAVwg0AALAVwg0AALAVwg0AALAVwg0AALAVwg0AALAVwg0AALCVWoWbjz76SB06dJDT6dTFF1+s9evXS5IyMzPVo0cPRUdHKzU1VcYY320CbQMAAAiE3+Fm8+bNGjlypKZPn64dO3aoXbt2GjVqlNxutwYNGqSkpCRlZGQoKytLc+fOlaSA2wAAAALl9xmK169fr6lTp+r3v/+9JOmee+7RVVddpSVLligvL08zZ85URESEpk6dqtGjR2vkyJEBt1XH7XbL7Xb7lvPz8yVJHo9HHo8n0BrUqdLSUt+/DXWOwaK8ftTxxFBH61BL61BLawRLHet6fn6Hm4EDB1ZY/uWXX9SxY0etW7dOPXv29F3Julu3bsrKypKkgNuqM23aNE2aNKnS+qVLlzbYK2lvL5Qkp9LT07Ujs75nYw9paWn1PQVboI7WoZbWoZbWaOh1LC4urtPxA7q21OHDh/Xkk09qzJgx2rJlixISEnxtISEhcjgcys3NVX5+fkBt0dHRVd7vhAkTNHbsWN9yfn6+4uPj1b9/f0VFRQXyUOrcul8PSD9lqGfPnjrvjJj6nk5Q83g8SktLU3JyssLCwup7OkGLOlqHWlqHWlojWOpY/slLXQko3DzyyCNq2rSp7rrrLj3yyCNyuVwV2sPDw1VcXCyn0xlQW3XhxuVyVbqNJIWFhTXYX6LT6fT921DnGGwa8u87mFBH61BL61BLazT0Otb13Gr9VfC0tDS98MILeuuttxQWFqaYmBjt27evQp+CggI1atQo4DYAAIBA1SrcbNmyRbfccouef/55nXPOOZKkHj16KD093dcnOztbbrdbMTExAbcBAAAEyu9wU1JSooEDB2rIkCEaPHiwCgsLVVhYqMsvv1x5eXmaN2+eJGn69Onq16+fHA6HevfuHVAbAABAoPw+5uazzz7T+vXrtX79er300ku+9Vu3btWcOXM0fPhwpaamyuv1avny5UcGdzoDagMAAAiU3+FmyJAh1Z5BuH379tq4caMyMjLUq1cvtWzZssLtAmkDAAAIREDflqpKXFyc4uLiLG0DAACoLS6cCQAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbIVwAwAAbMVZ3xNoaLbmFKnIXWrZeJv3Ffn+dTqtK3cTl1MJsU0sGw8AALsg3Bxla06R+j75VZ2MPe79nywfc9n43xFwAAA4BuHmKOV7bJ6+sbs6tmpqzZglbi3+6lsN/N0latLYZcmYm/YW6v531lq6hwkAALsg3FShY6um6hLXzJKxPB6PdreULmgXrbCwMEvGBAAA1eOAYgAAYCuEGwAAYCuEGwAAYCuEGwAAYCuEGwAAYCuEGwAAYCuEGwAAYCuEGwAAYCuEGwAAYCuEGwAAYCuEGwAAYCuEGwAAYCuEGwAAYCtcFRwAUCeKi4u1YcMGv/oWlrj1zU+bFR2boaaNXcft37lzZ0VERJzoFGFThBsAQJ3YsGGDkpKSanWbx/3st3r1al1wwQW1nxROCYQbAECd6Ny5s1avXu1X3192HdTY937SzBu6qtNpzf0aG6gO4QYAUCciIiL83rsSum2/XCtLdHaX89S9XYs6nhnsjgOKAQCArdQ63Ozfv18JCQnKzs72rcvMzFSPHj0UHR2t1NRUGWNOuA0AACAQtQo3OTk5GjhwYIVg43a7NWjQICUlJSkjI0NZWVmaO3fuCbUBAAAEqlbh5qabbtJNN91UYd2SJUuUl5enmTNnKjExUVOnTtUrr7xyQm0AAACBqtUBxXPmzFGHDh10//33+9atW7dOPXv29J1voFu3bsrKyjqhtuq43W653W7fcn5+viTJ4/HI4/HU5qFUqbS01PevFeNJ8o1j1XhS3cwzGNRFLU9F1NE61NI6p+p2zWrB8pys6/nVKtx06NCh0rr8/HwlJCT4lkNCQuRwOJSbmxtwW3R0dJX3P23aNE2aNKnS+qVLl1pyMqfthZLk1H/+8x9ta3rCw1WQlpZm2Vh1Oc9gYGUtT2XU0TrU8sSVb9fS09O1I7O+ZxP8Gvpzsri4uE7HP+GvgjudTrlcFc8mGR4eruLi4oDbqgs3EyZM0NixY33L+fn5io+PV//+/RUVFXWiD0U/78zXkz+l67LLLtO5bU98POlIOk1LS1NycrLCwsIsGbMu5hkM6qKWpyLqaJ1TuZbZ+4tU5PZaNp57d57003q16thV7do0s2TMJi6H2rdoYslYwSJYnpPln7zUlRMONzExMcrMrBizCwoK1KhRo4DbquNyuSoFIkkKCwuz5JfodDp9/1r9pLBqjlLdzjMYWFnLUxl1tM6pVsutOUVKfvrrOhn7gQ/XWzresvG/U0LsqRVwpIb/nKzruZ1wuOnRo4defvll33J2drbcbrdiYmICbgMANFxF7iPHxzx9Y3d1bGXNZ+NFJW4t/upbDfzdJWrix7WljmfT3kLd/85a31xxajnhcNO7d2/l5eVp3rx5uvXWWzV9+nT169dPDocj4DYAQMPXsVVTdYmz5iMkj8ej3S2lC9pFN+g9DggOlhxzM2fOHA0fPlypqanyer1avnz5CbUBAAAEKqBwc+yZhIcMGaKNGzcqIyNDvXr1UsuWLU+4DQAAIBCWXTgzLi5OcXFxlrYBAADUFhfOBAAAtkK4AQAAtkK4AQAAtkK4AQAAtmLZAcUAgFNHiDNfW/N/UWi4NSfxKy0t1c7SnVp/YL3vLOwnYmt+oUKcdXuKfzRchBsAQK2FNV+lh76bavm4sz+dbdlYYc2vlHSNZeMheBBuAAC15jl4sWYMGK5Eiy6/UFpaqq//87UuvexSS/bcbN5bqHvf3GzBzBCMCDcAgFozpVFKiOqkc1pYd/mFrc6tOjvmbEsuv1B2KE+mdJ8FM0Mw4oBiAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK5zEDwBQKyUeryQpc0eeZWMWlbiVsU9qsy1XTRq7Tni8TXsLLZgVghXhBgBQK5v/Lzj8dcFPFo/s1Oubvrd0xCYu/sydivitH6OhX+lW4mq3AOpX/3PbSJISWzVV4zCHJWP+sitP497/STOGdVWn06y5pEMTl1MJsU0sGQvBhXBzjGC40q3E1W4B1J+YJo1000VnWDpmaWmpJCmxZRN1ibMm3ODURbg5RkO/0q3E1W4BAKgJ4eYYDf1KtxJXuwUAoCZ8FRwAABvwer1avny5VqxYoeXLl8vr9db3lOoN4QYAgCC3YMECdezYUcnJyZo5c6aSk5PVsWNHLViwoL6nVi8INwAABLEFCxZo2LBh6tq1q1auXKm3335bK1euVNeuXTVs2LBTMuAQbgAACFJer1fjxo3TwIEDtXDhQl188cVq3LixLr74Yi1cuFADBw7U+PHjT7mPqAg3AAAEqZUrVyo7O1sPPfSQQkMr/kkPDQ3VhAkTtHXrVq1cubKeZlg/CDcAAASpXbt2SZK6dOlSZXv5+vJ+pwrCDQAAQeq0006TJGVmZlbZXr6+vN+pgnADAECQuvzyy9W+fXtNnTpVZWVlFdrKyso0bdo0JSQk6PLLL6+nGdYPwg0AAEHK4XBoxowZWrx4sYYMGaL09HSVlJQoPT1dQ4YM0eLFi/Xkk0/K4bDmGmDBgjMUAwDqRHFxsTZs2OBX3192HZR79yatz2yssv3Nj9u/c+fOioiIOMEZ2sPQoUP1/vvva9y4cerdu7dvfUJCgt5//30NHTq0HmdXPwg3AIA6sWHDBiUlJdXqNsP/179+q1ev1gUXXBDArOxp6NChGjx4sJYtW6YlS5bo6quvVt++fU+5PTblCDcAgDrRuXNnrV692q++hSVufbzsWw3oe4maNnb5NTYqcjgc6tOnj4qKitSnT59TNthIhBsAQB2JiIjwe++Kx+NRbs5eXXLRhZZdZBinLsIN6lVtPpMvLHHrm582Kzo2w+93dnwmD8AO/N1W1nY7KdlzW0m4Qb0K5DP5x/3sx2fyAILB1pwiFblLa+yT9dNa3Xj17/we09/tpCS9s+QrndO1e419mricSohtUotR6xfhBnXGnxesN+o0vbPkK7/G25ZTqJmfb9LYfh3VLrbpcft7o05T5o68GvsE2wsWgL1k7czXgNkfK8RZUGO/slK34kY/Um374V3/VXHWcnlL/v82z9G4mSLO6aNGp51V49gP/OdHhab/UmMfUxqpL++/Nmi2l4Qb1ImtOUW64ulFx33B1lbj9o31/KYd0iY/Oh/nxSoF3wu2Juy2tk5d1fJUqyOO78ffDiqs+Sq5Wn5x4oNd21JSy2NWZv3fz4lx77tS0rUnPM7JUu/hJjMzUyNHjtSmTZs0atQoPf744woJCanvaeEE5RS6rXvB1iH3vitV5L6mvqdxXOy2toY/dZTqrpb+1FEKjlrCGv3PbaMCz+2KjrpJLmf159V1uw9px/ZfK60vK/PqiUkPq03b0/WHO/+kPfmH9Maq7RpxcbxaR4Xr9Zdma8+uHRo/8R8KDa3621Nx8WfI5QqvcZ5tI1sH1XOyXsON2+3WoEGDlJKSovnz5+vee+/V3LlzNXLkyHqZT4nnyCXhj/dRRm0UlbiVsU9qsy1XTfx8l3w8m/YWWjJOXdq8t1CegxertPCc+p5KjUxppJq46j3j18iq3dYnwg67rf2to1R3tfSnjlLDryWsE9Okke68tPtx+/3www+64do/VNu+a80Orfl4lW/5n89VbB875PZqb7t69WpdcLa9jk+s1636kiVLlJeXp5kzZyoiIkJTp07V6NGjqw03brdbbrfbt5yfny/pyFcIPR7PCc/nv7uOhJq/LvjpuH3LPIfk2f+b32O//J9sv/qFtThdoWE1J+hyLoex5HHXhb5ntdA/BvRUh5ZN1Dis+nMtlJQUK3vzRr/G3JZTqGeWbdV9fRP8OuamfeKZaty45o8AmrgcOr1ZowZbR0las21/0OwFKy29usHWMljqKDX8WtaF8sd6Kj3m2khMTNSqVasqrf/ss880ceJELVu2TBERESosceuzld8r5fIeatrYpaKiIl1xxRWaPHmyUlJSqh37ZNe9ru+vXsPNunXr1LNnT99n0N26dVNWVvWfDU6bNk2TJk2qtH7p0qXWfI7tkW7qEKJWjY0aHeeqW9u3btPjM8ed+H0e44EpMxSfkHjcfi6HlLVquQWfpNadppL27q25z+bNmzVuXO3qOMHPM5jOmDFDiYnHr2VDrqEkySNdE3WhIkI6KayG5+WeHdv1v88/VSdTuO2eMWodF19jn+i4yIb9nPSzjlLd1dKfOkpBUMs6lJaWVt9TCCrlh3F899136tSpkySpV9dEFRw8oIKD8h07FhISol27dlU5RnXr61JxcXGdjh9ijDF1eg81GDdunA4dOqTnnvv/+89atmyp//73v4qOjq7Uv6o9N/Hx8crJyVFUVNRJmXO54uJi/fLL8XcvH5uij6dTp06n1AGH/tZRopbHU1fPSYlaVofnpHU8Ho/S0tKUnJzMSfxqwev16uyzz9a5556rDz74QF6v11dHh8Oh66+/XllZWcrKympQZyzOz89XbGys8vLy6uTvd73uuXE6nXK5Km4QwsPDVVxcXGW4cblclfpLUlhY2El/MTRr1kwXXXTRcft5PB4VHDygy3v15AVbBX/rKFHL4+E5aR1qWX/qY3sezMLCwjRjxgwNGzZMN9xwg1JTU1VSUqLVq1friSee0CeffKL3339f4eH+He5wstT177hew01MTIwyMzMrrCsoKFCjRo3qaUYAAAQXrgpeWb2Gmx49eujll1/2LWdnZ8vtdismJqYeZwUAQHDhquAVHeewurrVu3dv5eXlad68eZKk6dOnq1+/fqfsLwMAgECVXxW8d+/eXBW8Xu/c6dScOXM0fPhwpaamyuv1avny5fU5JQAAEOTq/exlQ4YM0caNG5WRkaFevXqpZctjTx0NAADgv3oPN5IUFxenuLi4+p4GAACwgXo95gYAAMBqhBsAAGArhBsAAGArhBsAAGArhBsAAGArhBsAAGArhBsAAGArDeI8N4Eyxkg6cun0hsrj8ai4uFj5+flc6fYEUUtrUEfrUEvrUEtrBEsdy/9ul/8dt1pQh5uCggJJUnx8fD3PBAAA1FZBQYGaNWtm+bghpq5i00lQVlamnTt3KjIyUiEhIfU9nSrl5+crPj5e27dvV1RUVH1PJ6hRS2tQR+tQS+tQS2sESx2NMSooKFDbtm0VGmr9ETJBvecmNDRUp59+en1Pwy9RUVEN+okWTKilNaijdaildailNYKhjnWxx6YcBxQDAABbIdwAAABbIdzUMZfLpUcffVQul6u+pxL0qKU1qKN1qKV1qKU1qOMRQX1AMQAAwLHYcwMAAGyFcAMAAGwlKMINn5xZIyMjQwsXLqyxT/m5g46Vk5Pj9/14vV6tXbtWxcXFtZ1iUPj111+1bdu2+p6GJQ4ePKi8vLz6nkaDtWjRIs2fP9+vvtTy1LJw4cIKv+/du3dr3bp1ft+e7WTdavDhpqCgQOeff742bNgg6cippcvKyir08Xg88ng8xx1r1apVVRbb6/X6dXvpyB9/f/s2NLt27dI999yjw4cPV9tn9uzZSklJ8b3gDh8+LI/Ho169eumdd96RdCRsut3uascoLi7W+eefrx9++MHaB1ALixYt0m233eZb9vc5UlZWppKSEpWWllbb55lnntEdd9xR4zg5OTm67LLLtHfvXv8nfQLy8/MVEhKi008/Xe3bt1f79u3VtGlTRUVF+ZbbtGmj2NjYCrd79NFH9Y9//KPSeG+88YZ+97vfnZS5N2SvvvqqvvjiC7/6UssT89NPP2nr1q0n/X737t2r1NRUeb1eSdJdd92lKVOmSJJ27typO+64Q0VFRRVuk5eXpxtvvFGvvPKKb93jjz+uW265xe8342wn63Y72eDDzcSJE9WuXTt17txZkvTAAw8oJiZGsbGxio2NVYsWLdS8eXNNnz5dkrRy5Up98803Sk9PV3p6ur755hstX75ckjR58mQ98cQTle7jnXfeUXR0tG/M2NhYRUVFyel0VlgXGxurmJgY/elPf6pw+7y8PC1fvlwPPfSQ5s2bZ9ljP/aJd6IGDBiga665psIT6egn188//6zJkyfL5XIpMjJS5513nvr376+//e1vKioq0oMPPqgmTZrowgsvVK9evaq9H6fzyLkhW7dubdnca+Pzzz/Xo48+qscee8y3bvbs2WrZsqXv99ioUSM1bdq00u83NjZWLVu21LJly6od3+l0HvexxcbG6t5771VKSkqlDWNdKP9mxH/+8x9lZ2crOztbw4YN0x//+Eff8vz589WoUaNKtwsPD/ctX3nllUpPT1fjxo0r9T3VuN1uffnll7rzzjv96k8tj6+oqEgpKSn6+eefK7X985//1N/+9jeVlJT4HRDcbrcvlFSlpKSk0roJEybo+eef9y1HR0fr22+/1ZNPPimp4u9xypQpcrlcatKkSYUx3nzzTUVHR+vOO++U2+1WWVmZHn74Yf3hD3/whYPS0tIa30iynazj7aRpwD788EMTExNjdu7c6fdtbr/9dnPTTTeZfv36mdjYWHPzzTebm2++2Rw4cMBERkaaHTt2+DXOzJkzzW233VZjn7ffftu0adPGtG3b1jRr1sw8/vjj5rvvvjN33nmniYmJMe3atTPt2rUzcXFxpnnz5n4/BmOMSUtLM927dzfbt2/3rZNk1qxZY4wxZsuWLSYqKsq8+uqr1Y7x4YcfGofDcdyfa6+91pxzzjnmtNNOMx999JExxpjhw4ebWbNmmZUrV5pzzjnH7Nmzx+Tm5przzz/fbN68uca5u91uI8n88ssvNfZbsWKFCQ0NNS1atKj2x+l0mtdee82/ohlj9u/fbzp37my2bt1aY7/k5GTz8ssv+z3u0SZMmGBuvvlmv/o+9dRT5o9//GNA91MbHo/HSKrwuG+77TZz3333+ZaXLVtm4uLifMvFxcXmwQcfNPfcc49ZsGCBKS0tNeHh4Wbv3r3mww8/NCkpKcYYY0pLS43H46nzx9AQlJaW+v6/aNEic+aZZ1bb1+v1+v4fjLXcvn276du3r2nSpIm54oorzI4dO8xrr71mJJnQ0FATHx9vUlNTjdvtNsYcef5IMpKM0+k0Xbp0MZ9++qnf9+f1es2VV15pnnrqqUpt+/fvNxEREeann37y3cexP2vXrq10u7vuustER0dXu/1o06ZNpdscPHjQ9OnTx7z66qvmpptuMikpKSYpKcn07t3bpKSkmHbt2plOnTqZlJQU43A4TN++fU1KSorvdXz48GGTmJhoxowZY5544gkjyYSEhBiHw2EkGYfDYUJCQowkc8stt1RbD7aT/19dbCcbbLj57bffTFRUlHn33XfN2rVraxVwjDnyQrz44ot9y7NmzTJhYWG+wBEfH29at25d7e2Tk5PNokWLaryPQ4cOmaKiIrN161aTmJjoW/+Xv/zFPP/8877lrVu3mlatWvk99+qeeOXhprS01FxyySXHfeIsWrTI9OnTp8K6LVu2mC1btviWBw8ebB5++GHz1FNPmWHDhhljjOnUqZPJzs42xcXFxhhjlixZYv7+978bY4wZOXKk+f7772u8399++81IMunp6TX2W7VqVYU/tlXp06ePeeONN2rsc7THHnvMPPnkk8ft16tXL1+Qq61Ro0aZq666yu/+5513nsnOzg7ovvxVHm7atWtnEhMTTWJioomMjDTNmjXzLbdt29ZX771795pWrVqZBx980PzP//yPad26tVm9erVxOp2mRYsWJjIy0oSFhZkWLVqYZs2amWnTptXp/BuKRo0amebNm5sWLVqYsLAwExERUeGPiMvlMk2bNjXR0dHmrLPOMsYEby179+5tRo4caTZv3mzuuOMOc9VVV5nXXnvNdOnSxezevdssXLjQtG3b1txxxx3GmCPb1KioKJObm2t27txp5syZY5o2ber3G8Z//OMfZsyYMVW2PfbYY6Z79+7GGGN+/fVXs2fPHrNv3z6zbds2Ex8fb/785z9b86D/T25uru8N3cqVK82uXbtq/Pn4449N165djTHGPPvss0aSefjhhyuE1Y0bN5qj9xccL8iynazI6u1kgw03ZWVl5oMPPjAej8ece+65ZujQocbj8fjeRRzL7XZXeCIdHW4OHz5s2rdvb7766itjjDFTpkwxY8aMMYWFhb7+c+fO9QWfdu3aGYfDYeLi4iqsa9eunRk1alSl+ywPN2VlZcbtdpv77ruvUripKUgdq7onXnm4mThxoklMTDR5eXmV+hQWFvpC0ccff2wuuOACk56ebsrKyszhw4fNU089Ze69915f/8GDB5uJEycat9ttwsPDTVJSknE4HOaiiy4yjz76qJk1a5bp0KGDiY2NNZdeeqmJiooyZ555prnzzjurnf/nn39uJJkXX3yxxse5Zs0av160b7/9do19jnbRRReZXbt2Hbdfu3btTFhYmHG5XBV+pkyZctzbXnbZZSYuLs6UlZX5NaepU6eaf/3rX371DVT5u8Dj7blp27atMcaYb7/91vTq1cs8+OCD5tFHHzUDBw40w4cP9z2/j97bcCpyu90mJibGfPfddxXWt2nTxnz44YcV1gVjLbOyskxYWJjJz883xhiTnZ1tJJlnnnnGnHfeeb5+n376qXE6nWb//v1m2bJlplmzZhXG6dKli3nrrbeOe39FRUWmY8eO5tChQ5XacnNzTXR0tLn00ksrtd1///2mb9++FfaoHT2mP3vBSkpKqlz/3nvvmdatW5stW7aY2NhY07JlS9O6desKPzExMSYpKcn89NNPZujQoebnn382kZGRpnv37ubhhx+uMN6x4eZ42E5WZPV2ssFeODMkJERDhw7VzJkzlZOTo6+//lpz587VnXfeqdDQUIWEhMgYo7KyMjkcDnm9Xj3//PPavXu3PB6PkpOTJR05QOr555+Xy+XSGWecIenIUe1t27at8DlqSUmJLrvsMr3xxht65JFHdPrpp+t//ud/tGnTJo0YMULp6emaO3euFi9e7LvNbbfdpo8//th3cFWLFi0UExOjgQMHntBj/+ijj/TRRx9V2fb111/r8ccf14oVKypcFO2mm25SQUGB3G63nE6nGjVqpIKCAm3dulUTJ05Uo0aN9O9//1tJSUmVrsAaEhKiRo0a6bbbbpPT6dTGjRs1adIkde/eXXv27JHX61VaWppmzJihW2+9Vffff7/OPfdc5eTkVDpAVZJWrFihxMREvfbaa7rrrrtOqBa1tW/fPrVp06bGPiUlJdq1a5cOHDigpk2b+tYPGTJEkZGRx71tRkaG4uLitHTpUqWkpBx3Th06dFBGRoZ/DyBAHo9HiYmJGjp0qEJDQ1VWVqaDBw+qcePGuvDCCyUdOQagbdu2kqR169bp7LPP9t1+4cKF6tKli2bNmlWn8wwWH330kTp16qQePXr41q1bt04FBQWVfufBWMvvvvtOiYmJvuf76aefrgcffLDSMSJXXnmlQkJCtHbt2iqv3Ox0Ov06AHX58uW68sorqzxr7iOPPFLhWKVy6enpmjdvntatWyeHw1Gp/YILLtAvv/xy3PuWjrxuj72PwYMHa/z48UpISFB4eLi+/fbbShdiTk9PV2pqqrp06aIPPvhAM2bM0I033ljhcRQVFVU6psrr9crtdisiIqLaObGdrMjq7WSDPqB4zZo1euihh3T99derWbNm+uMf/yiv1yuv16vS0lJ99tlnOu+881RaWiqv16u77rpLxhjfAWZr1qxRu3btFBUVpZtvvtl3YOD69evVoUOHCvdVfnDX8Rzd7+2331Z+fr7uuOMOJSQk6Ndff9WmTZuqfLHXdNDbsWp64qWmpsrtdqtx48aV2s4++2x9/vnn+vTTT+X1ehUTE6Pc3Fx98cUX+vjjj+V0OtWnTx+NGTNGTqdTTqdTixYt8h3xHhUVpS1btqisrEy7d++W2+3Wv//9bxUWFuryyy/XwoULNXToUP36669auHChsrOzK83BGKM33nhDM2fOVH5+vpYuXer347aCy+U67sZ2xYoV6tKlS4UXrHTkm3nR0dE13vadd97ROeeco6lTp2ry5Ml+zamoqKhOr34rSU2aNNGmTZv0ww8/KCMjQ7Nnz9bWrVv11VdfKSMjQxkZGVq7dq2+//57SVJCQoJuuOEG3+1DQ0P1zDPP6JJLLqny24i1ef7aweOPP67rrruuwrrXX39dQ4cOrfTaC8Za7t69Wy1atPAtOxwOTZ8+vdKblfIvVVT1bZa0tDT98ssvuvTSS497f9u2bau0zZWkzz77TG+99ZYeeeSRCutLS0t155136h//+EelwFHuu+++U0FBgUpKSjR69Gj96U9/UklJiaZMmaLbb79dJSUlKikpUV5eXpXhKSwsTI0bN1ZRUZFcLpcuvfRStW/fXq1bt9Zpp52m9u3b64YbbqgQrMaMGaNnnnmmwjiJiYkKDw/XueeeK0kKDw+Xy+XyLVeF7WRlVm8nG+yemy1btuiaa65Rt27dFBISIklVvnMoV95W3leSzj//fKWnp0s68p37Z555RkVFRVq1apVeeOGFGu9/ypQpevbZZ3X48GHFxMRU22/Pnj2aP3++QkND1aVLF33xxRdyu90aP3687wVbVlZWqw1a+RMvLCysUlvv3r3lcDj06KOP6oMPPqjQdtVVV/n+HxkZqejoaPXv31+fffaZRowYoVtvvVX9+/evcJs33nhDkZGR+uijjzR//nwZY1RSUqJXX31Vzz33nJYuXaqzzz67Qtg6cOCAnnrqKd8egaO9/fbbKi0t1dVXX609e/Zo7NixWr169Um7zknHjh31/fff1/htrv/93//VkCFDKq0/ePBgjS9aj8ejJ598Uvfee6+GDh2qBx54QG+++aZuueWWGue0atWqSnW32nXXXacVK1b4nv/lgfXoPQrlpk+frlGjRkmS79sOISEh6t+/vzp27Kj9+/fL6/Xq0KFDio2NVUlJid577z1dc801dfoYGgq3262rr75as2bN0sKFC3XPPffoiiuu0Msvv6xVq1ZV6l/+uw2mWno8Ht82c+zYsXr11VclyfcV6KOV7yWXjnwztHnz5jp06JDCw8M1e/ZsJSYmHvf+IiMjqzx/1nnnnafZs2dXenP57LPPKjw8XHfffXe1Yx6957qkpEStWrVSeHi4nE6nHA6HL9BUFWzKHT58WBEREdq0aZNv3f3336/TTz9d48ePr9Q/NDS00t6YXbt2KSQkRJs2bdKZZ56pQ4cOSar5/GxsJyuzejvZYPfchIeHa/To0Ro+fLgl451xxhlq1aqVJk6cqNNOO01nnnlmjf3/9re/KTMzU5988kmN/UaPHq0RI0aoSZMmGjt2rB555BG9/PLLys7OVk5OjnJycrR3795andyr/IlXlSlTpmjSpEn68MMPtXbt2hrHWb9+vc4//3zf8rHvIiVpxIgRGjx4sAYPHqyrr77a97HFyJEj9dZbb/k2FmvXrvX99OrVq8qguWfPHt13332aNWuWwsLCdMcddygqKkpjx471+7GfqBEjRmjmzJnVtq9bt06LFi2qcjfwgQMHagyykydPVnh4uEaNGiWn06kXXnhBf/7zn/Xf//632tvs2bNHn3zyiQYMGFC7B1JL7777rvbt2+d7zj3wwAO6/vrrfcvlP+WnPKjOpk2blJubq3nz5umKK65QTk6OioqK6v2P8cnkcrk0efJkZWdna8yYMZo1a5bat2+vhISE4+7KP1pDrmWzZs108OBBSdLDDz+stWvXVvkmzOv1Kicnx/eV3sjISK1du1abN29Wbm6ubr/9dr/ur3Pnzr43mkdr06aNbrrppgrrCgsLNXnyZM2YMaPGN7RH+/nnn4+7TT/Wjh071LhxY4WGhqpDhw7q2LGjOnbsqLlz52r69Om+5cTERIWEhFT5tXKp4htqf9aznaysLraTDTbctG3bttKuykDs3LlTK1eulCSNGjVKzzzzjF/nrZgyZYq6dOlS44ZoypQp2rJli+655x5J0p/+9Ce9+uqr2rNnj84880xt2LBBubm56tSpk+8khP6o6YkXFhamCy+8UAMHDtTEiROrHePQoUNatWqVrr32Wr/uc8+ePVqwYIEOHTqkZs2ayePx6LrrrpPb7dbOnTvVtGlT388HH3xQ6YW7d+9eXXnllRoyZIgGDx4s6ci7nLlz52r+/Pm67777ajzhk1VuuOEG7d271/dO9GiFhYUaMWKExo0bV+n8C8YY7dmzp8Ku+qM9++yzeu655zR37lzfBveqq67SiBEj1LdvX61Zs6bSbTwej4YPH65HH320xnePVggLC/PNKzc3Vy+88IKuv/76Sv12796tdu3a1elc7MLhcPg+qrjooosUHx+v9u3b69FHHw36MxF3795dv/zyiwoKCtSiRQu1bt1ahYWFlfotX75cISEhSkpKknTkNd2+fXvFxcVV+8e7KklJSdqxY4d+/PHH4/Z96aWX1KVLF/Xu3bvC+gMHDlTZ/9dff9Xq1at15ZVX+j0fSXr66ac1YsQISdIPP/ygTZs2adOmTbr99tv117/+1bdcfk6eqg4FqC22kydvO9lgw00gioqKtHfvXn3zzTeaNm2aMjIylJKSotWrV0s6csI6r9erVq1aHXcsf/bcDB48WO+++65vl6rT6VTjxo1111136Q9/+IM6d+7sO9HTddddV+XGoyo1PfHK/f3vf9fixYurPQBr3bp1SkxMrHG3YzmPx6OBAweqpKREzzzzjMrKynTnnXfqoYceksvlUtu2bVVYWOj7uf766yvscv3ss8/Us2dPdenSRS+++GKFsc866yx98cUXevPNN3XJJZf4TqhYfr+7du2q8gRR5T9ff/11rT7SCw0N1Ztvvqknnniiwom6Dh48qEGDBqlp06b629/+5lufn5+vd955Rw899JCcTqcSEhIqjJeXl6fRo0froYce0ieffKIuXbpUaJ81a5auueYa9ezZUxMnTvT90SsqKtKAAQPUqVMn30dAdS0vL0+vv/66kpKSdPnll+vmm2+u0L5hwwYVFRVVOPahtLTUtzEtKiqqstZlZWUn5USEDUFpaak2bNig1157TTfccIPOO+88de3aVV988YUWLVqktLQ0rVixQh06dPCdsfvo2wZLLS+99FKdddZZuvvuu5Wdna3Jkyf7XtNer1d79uzR4sWLdfvtt2v06NGWHAvx1FNP6ZZbblFBQUGN/d57770q9xhMnz5dt956a6X1DzzwgAYNGlRtaN+wYYO++eabCus+//xzffzxx5VOyHqsr776Sm+//XalA2jLyspUVlamwsJC5eXl6dChQ76DsQ8dOuQ71icnJ8dXV7aTJ3k7adn3rurIE088Ye65555K61evXm2mT59uzj//fN+68ePHm8jISDNs2DDz6quv+s6N89///tdcd911plOnTubf//63admypRk5cmSF8wu8+OKLpmnTpiYxMdFER0ebli1bmsTERHPGGWcYl8tlEhMTTatWrczvf//7SnPZuHGj6dChgzHGmAceeMD07dvXHD582NdeVlZm+vbt6/cJjYw5cq6Hzp07m9mzZ/vW6aiT+Blz5GvcV199tTHGmB9++MHs27fPGGPMjz/+aCIjI01aWpqv75o1a8zevXsr3U9hYaFJTk42N954o3nppZeMMcaEhYWZoqIiY4wxOTk5JiwszCQlJfl+oqOjfedU+PHHH02TJk3M1KlTK5zU7Fjbt283gwcPNmPHjvWtW7lypV9fcazpRIXV2bdvn1m6dKlvedGiRSY5OdkcPHiwQr+ysjJzxRVXmOuuu853qoCj3XzzzaZ79+5mw4YNNd7fa6+9Zjp16uQ7h5DX6zXz58+v9bwDUVxcbFJSUozL5TJXXnmleffddyt8/bK0tNQMHjzYREREmBtvvLHCbe+9914zbtw4Y4wxZ599doXz5JT/tG/f3rRr1+6kPJb6tmLFCtOuXTtz/fXXm9dee80UFBRU2e+VV14xP/zwQ4V1wVbLzZs3m0svvdQ0bdrUXH/99SY+Pt489dRTvpP4JSQkmMcee8z3uq7qq+C1NW3aNHP++eeb3bt3V1j/zjvvmEsuucQYc+R8J//6178qfG18+/btplu3bhW+fu12u83o0aNNy5YtzbZt23zrH3/8cXPFFVf4bp+amlrh78TChQtN+/btzY8//ugbo/ycXsYcOTHg5MmTjTHG/PGPfzTXXHONWbBgQYX53nHHHWb8+PFm7NixpmnTpqZZs2YVfiIjI43L5TKhoaEmNzeX7eT/OZnbyQYfbqZMmWJuv/32Suv//Oc/m7POOsvMmjXLt+7gwYOVzoMzY8YM43Q6zahRo3zndNi0aZPp3bu3OeOMM0xOTo4xxpjnnnuuxrNJGnPkFzNkyJBK6zMyMkxcXJzZvXu3GTBggDlw4EClPlu2bDH//Oc/a3xiH+vYJ15tHP1irUlVT67ffvutVvPcv3+/3339PedBQ1FUVFTluTmqUp+PbePGjb5wW5UNGzaYjRs3nsQZAVX78MMPfW+eyr3++uvmwgsvNMYcCVGXXXaZiY6O9gWH0047zVx11VW+c7Ps3bvXnHPOOSY+Pr7SmYu/++4707p1a985Wdq2bVthO/rtt99WOJHpycJ28oiT9dhCjLH3JbeLioq0efNmdevWrVLb9u3bFR8fXw+zAgCciIyMDJ155pl1fpoFBCfbhxsAAHBqsdUBxQAAAIQbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK4QbAABgK/8PWzRBa3TF4ZAAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "#设置中文字体\n",
    "plt.rcParams['font.sans-serif'] = ['SimHei']    #使用黑体\n",
    "plt.rcParams['axes.unicode_minus'] = False    #正常显示负号\n",
    "# 对天津地区进行异常值检测\n",
    "file_data_tjinfo.boxplot(column=['行政面积（K㎡）', '户籍人口（万人）', '男性', '女性', 'GDP（亿元）', '常住人口（万人）'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25fad407-b7f1-460a-84ed-775e1c29baab",
   "metadata": {},
   "source": [
    "从两次输出的图表可以看出，这两组数据中都存在有异常值，以天津地区信息为例，在GDP这列数据中有一个明显高于其他值（大于6000）的数据，与原始的file_data_tjinfo对象进行对照，发现这个值是滨海新区的GDP值，由于滨海新区地域性特点门户港口的特点，制造业和转化基地位置，所以GDP会高出其他地区很多，所以这个值暂时不作为异常值处理。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "da6eb18c-b002-4920-a78f-acc576b1e58e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "55c6df4e-d9a7-4a93-ba8b-14e9cd3f29bd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>省级单位</th>\n",
       "      <th>地级单位</th>\n",
       "      <th>县级单位</th>\n",
       "      <th>区划类型</th>\n",
       "      <th>行政面积（K㎡）</th>\n",
       "      <th>户籍人口（万人）</th>\n",
       "      <th>男性</th>\n",
       "      <th>女性</th>\n",
       "      <th>GDP（亿元）</th>\n",
       "      <th>常住人口（万人）</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>西城区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>51</td>\n",
       "      <td>146.47</td>\n",
       "      <td>72.88</td>\n",
       "      <td>73.59</td>\n",
       "      <td>3602.36</td>\n",
       "      <td>125.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>东城区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>42</td>\n",
       "      <td>97.41</td>\n",
       "      <td>47.91</td>\n",
       "      <td>49.50</td>\n",
       "      <td>2061.80</td>\n",
       "      <td>87.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>丰台区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>306</td>\n",
       "      <td>115.33</td>\n",
       "      <td>58.39</td>\n",
       "      <td>56.95</td>\n",
       "      <td>1297.03</td>\n",
       "      <td>225.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>朝阳区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>455</td>\n",
       "      <td>210.91</td>\n",
       "      <td>105.43</td>\n",
       "      <td>105.48</td>\n",
       "      <td>5171.03</td>\n",
       "      <td>385.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>房山区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1990</td>\n",
       "      <td>81.28</td>\n",
       "      <td>40.76</td>\n",
       "      <td>40.52</td>\n",
       "      <td>606.61</td>\n",
       "      <td>109.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>石景山区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>84</td>\n",
       "      <td>38.69</td>\n",
       "      <td>19.87</td>\n",
       "      <td>18.82</td>\n",
       "      <td>482.14</td>\n",
       "      <td>63.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>海淀区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>431</td>\n",
       "      <td>240.20</td>\n",
       "      <td>120.08</td>\n",
       "      <td>120.12</td>\n",
       "      <td>5395.16</td>\n",
       "      <td>359.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>通州区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>906</td>\n",
       "      <td>74.68</td>\n",
       "      <td>37.08</td>\n",
       "      <td>37.60</td>\n",
       "      <td>674.81</td>\n",
       "      <td>142.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>顺义区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1020</td>\n",
       "      <td>62.74</td>\n",
       "      <td>31.12</td>\n",
       "      <td>31.61</td>\n",
       "      <td>1591.60</td>\n",
       "      <td>107.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>昌平区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1344</td>\n",
       "      <td>61.14</td>\n",
       "      <td>30.72</td>\n",
       "      <td>30.41</td>\n",
       "      <td>753.39</td>\n",
       "      <td>201.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>大兴区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1036</td>\n",
       "      <td>68.38</td>\n",
       "      <td>34.02</td>\n",
       "      <td>34.36</td>\n",
       "      <td>1796.95</td>\n",
       "      <td>169.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>门头沟区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1451</td>\n",
       "      <td>25.12</td>\n",
       "      <td>12.80</td>\n",
       "      <td>12.32</td>\n",
       "      <td>157.86</td>\n",
       "      <td>31.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>怀柔区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>2123</td>\n",
       "      <td>28.29</td>\n",
       "      <td>14.13</td>\n",
       "      <td>14.16</td>\n",
       "      <td>259.41</td>\n",
       "      <td>39.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>平谷区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>950</td>\n",
       "      <td>40.20</td>\n",
       "      <td>20.22</td>\n",
       "      <td>19.98</td>\n",
       "      <td>218.31</td>\n",
       "      <td>43.70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>密云区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>2229</td>\n",
       "      <td>43.59</td>\n",
       "      <td>21.77</td>\n",
       "      <td>21.82</td>\n",
       "      <td>251.13</td>\n",
       "      <td>48.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>北京</td>\n",
       "      <td>北京</td>\n",
       "      <td>延庆区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1994</td>\n",
       "      <td>28.42</td>\n",
       "      <td>14.32</td>\n",
       "      <td>14.11</td>\n",
       "      <td>122.66</td>\n",
       "      <td>32.70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>和平区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>10</td>\n",
       "      <td>42.32</td>\n",
       "      <td>20.37</td>\n",
       "      <td>21.95</td>\n",
       "      <td>802.62</td>\n",
       "      <td>35.19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>河东区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>39</td>\n",
       "      <td>75.79</td>\n",
       "      <td>38.06</td>\n",
       "      <td>37.73</td>\n",
       "      <td>290.98</td>\n",
       "      <td>97.61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>河西区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>37</td>\n",
       "      <td>83.20</td>\n",
       "      <td>40.83</td>\n",
       "      <td>42.37</td>\n",
       "      <td>819.85</td>\n",
       "      <td>99.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>南开区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>39</td>\n",
       "      <td>87.28</td>\n",
       "      <td>43.30</td>\n",
       "      <td>43.98</td>\n",
       "      <td>652.09</td>\n",
       "      <td>114.55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>河北区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>27</td>\n",
       "      <td>63.42</td>\n",
       "      <td>31.86</td>\n",
       "      <td>31.56</td>\n",
       "      <td>415.67</td>\n",
       "      <td>89.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>红桥区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>21</td>\n",
       "      <td>51.66</td>\n",
       "      <td>25.93</td>\n",
       "      <td>25.73</td>\n",
       "      <td>208.16</td>\n",
       "      <td>56.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>东丽区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>460</td>\n",
       "      <td>37.70</td>\n",
       "      <td>18.83</td>\n",
       "      <td>18.87</td>\n",
       "      <td>927.08</td>\n",
       "      <td>76.04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>西青区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>545</td>\n",
       "      <td>14.85</td>\n",
       "      <td>19.85</td>\n",
       "      <td>20.38</td>\n",
       "      <td>1040.27</td>\n",
       "      <td>85.37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>津南区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>401</td>\n",
       "      <td>44.83</td>\n",
       "      <td>22.35</td>\n",
       "      <td>22.48</td>\n",
       "      <td>810.16</td>\n",
       "      <td>89.41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>北辰区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>478</td>\n",
       "      <td>40.39</td>\n",
       "      <td>20.09</td>\n",
       "      <td>20.30</td>\n",
       "      <td>1058.14</td>\n",
       "      <td>98.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>武清区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1570</td>\n",
       "      <td>92.27</td>\n",
       "      <td>45.86</td>\n",
       "      <td>46.41</td>\n",
       "      <td>1151.65</td>\n",
       "      <td>119.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>宝坻区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1523</td>\n",
       "      <td>71.10</td>\n",
       "      <td>35.72</td>\n",
       "      <td>35.39</td>\n",
       "      <td>684.07</td>\n",
       "      <td>92.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>滨海新区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>2270</td>\n",
       "      <td>128.18</td>\n",
       "      <td>66.04</td>\n",
       "      <td>62.14</td>\n",
       "      <td>6654.00</td>\n",
       "      <td>299.42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>宁河区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1414</td>\n",
       "      <td>40.00</td>\n",
       "      <td>20.21</td>\n",
       "      <td>19.79</td>\n",
       "      <td>525.37</td>\n",
       "      <td>49.57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>静海区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1476</td>\n",
       "      <td>59.79</td>\n",
       "      <td>30.35</td>\n",
       "      <td>29.44</td>\n",
       "      <td>667.83</td>\n",
       "      <td>79.29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>天津</td>\n",
       "      <td>天津</td>\n",
       "      <td>蓟州区</td>\n",
       "      <td>市辖区</td>\n",
       "      <td>1593</td>\n",
       "      <td>86.24</td>\n",
       "      <td>43.86</td>\n",
       "      <td>42.38</td>\n",
       "      <td>392.55</td>\n",
       "      <td>91.15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   省级单位 地级单位  县级单位 区划类型  行政面积（K㎡）  户籍人口（万人）      男性      女性  GDP（亿元）  常住人口（万人）\n",
       "0    北京   北京   西城区  市辖区        51    146.47   72.88   73.59  3602.36    125.90\n",
       "1    北京   北京   东城区  市辖区        42     97.41   47.91   49.50  2061.80     87.80\n",
       "2    北京   北京   丰台区  市辖区       306    115.33   58.39   56.95  1297.03    225.50\n",
       "3    北京   北京   朝阳区  市辖区       455    210.91  105.43  105.48  5171.03    385.60\n",
       "4    北京   北京   房山区  市辖区      1990     81.28   40.76   40.52   606.61    109.60\n",
       "5    北京   北京  石景山区  市辖区        84     38.69   19.87   18.82   482.14     63.40\n",
       "6    北京   北京   海淀区  市辖区       431    240.20  120.08  120.12  5395.16    359.30\n",
       "7    北京   北京   通州区  市辖区       906     74.68   37.08   37.60   674.81    142.80\n",
       "8    北京   北京   顺义区  市辖区      1020     62.74   31.12   31.61  1591.60    107.50\n",
       "9    北京   北京   昌平区  市辖区      1344     61.14   30.72   30.41   753.39    201.00\n",
       "10   北京   北京   大兴区  市辖区      1036     68.38   34.02   34.36  1796.95    169.40\n",
       "11   北京   北京  门头沟区  市辖区      1451     25.12   12.80   12.32   157.86     31.10\n",
       "12   北京   北京   怀柔区  市辖区      2123     28.29   14.13   14.16   259.41     39.30\n",
       "13   北京   北京   平谷区  市辖区       950     40.20   20.22   19.98   218.31     43.70\n",
       "14   北京   北京   密云区  市辖区      2229     43.59   21.77   21.82   251.13     48.30\n",
       "15   北京   北京   延庆区  市辖区      1994     28.42   14.32   14.11   122.66     32.70\n",
       "16   天津   天津   和平区  市辖区        10     42.32   20.37   21.95   802.62     35.19\n",
       "17   天津   天津   河东区  市辖区        39     75.79   38.06   37.73   290.98     97.61\n",
       "18   天津   天津   河西区  市辖区        37     83.20   40.83   42.37   819.85     99.25\n",
       "19   天津   天津   南开区  市辖区        39     87.28   43.30   43.98   652.09    114.55\n",
       "20   天津   天津   河北区  市辖区        27     63.42   31.86   31.56   415.67     89.24\n",
       "21   天津   天津   红桥区  市辖区        21     51.66   25.93   25.73   208.16     56.69\n",
       "22   天津   天津   东丽区  市辖区       460     37.70   18.83   18.87   927.08     76.04\n",
       "23   天津   天津   西青区  市辖区       545     14.85   19.85   20.38  1040.27     85.37\n",
       "24   天津   天津   津南区  市辖区       401     44.83   22.35   22.48   810.16     89.41\n",
       "25   天津   天津   北辰区  市辖区       478     40.39   20.09   20.30  1058.14     98.38\n",
       "26   天津   天津   武清区  市辖区      1570     92.27   45.86   46.41  1151.65    119.96\n",
       "27   天津   天津   宝坻区  市辖区      1523     71.10   35.72   35.39   684.07     92.98\n",
       "28   天津   天津  滨海新区  市辖区      2270    128.18   66.04   62.14  6654.00    299.42\n",
       "29   天津   天津   宁河区  市辖区      1414     40.00   20.21   19.79   525.37     49.57\n",
       "30   天津   天津   静海区  市辖区      1476     59.79   30.35   29.44   667.83     79.29\n",
       "31   天津   天津   蓟州区  市辖区      1593     86.24   43.86   42.38   392.55     91.15"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对两地信息数据信息进行合并\n",
    "pd.concat([file_data_bjinfo, file_data_tjinfo], ignore_index=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
